From 0c883525308de8c268a23fc9236ac464cb9d3ddd Mon Sep 17 00:00:00 2001 From: wlthr Date: Thu, 29 Jun 2023 19:07:23 +0200 Subject: [PATCH] added sp3mm4amg source code to the repository and created C wrapper routine to be used in the fortran interface --- .vscode/settings.json | 7 + base/serial/impl/sp3mm4amg/COPYING | 15 + .../impl/sp3mm4amg/Sp3MM_CSR_OMP_Multi.c | 66 + .../sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c | 249 ++ .../Sp3MM_CSR_OMP_SymbStep_Generic.c | 594 ++++ .../sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Multi.c | 92 + .../impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c | 573 +++ .../serial/impl/sp3mm4amg/commons/ompGetICV.c | 86 + .../commons/ompi_dims_create/mpi.h.in | 3141 +++++++++++++++++ .../ompi_dims_create/ompi_config_minimal.h | 193 + .../ompi_dims_create/ompi_dims_create.c | 288 ++ .../sp3mm4amg/commons/sparseUtilsGeneric.c | 499 +++ .../impl/sp3mm4amg/commons/sparseUtilsMulti.c | 38 + base/serial/impl/sp3mm4amg/commons/utils.c | 502 +++ .../impl/sp3mm4amg/fbind/psb_f_spmm_ub.c | 107 + .../sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h | 94 + .../include/Sp3MM_CSR_OMP_Num_Generic.h | 95 + .../include/Sp3MM_CSR_OMP_SymbStep_Generic.h | 195 + .../include/Sp3MM_CSR_OMP_SymbStep_Multi.h | 80 + .../include/Sp3MM_CSR_OMP_UB_Generic.h | 95 + .../impl/sp3mm4amg/include/SpMMUtilsGeneric.h | 406 +++ .../impl/sp3mm4amg/include/SpMMUtilsMulti.h | 56 + base/serial/impl/sp3mm4amg/include/config.h | 155 + .../impl/sp3mm4amg/include/inlineExports.h | 49 + .../sp3mm4amg/include/inlineExports_Generic.h | 43 + .../include/linuxK_rbtree_minimalized.h | 335 ++ base/serial/impl/sp3mm4amg/include/macros.h | 118 + .../impl/sp3mm4amg/include/macrosLinuxMock.h | 140 + base/serial/impl/sp3mm4amg/include/mmio.h | 133 + .../impl/sp3mm4amg/include/ompChunksDivide.h | 105 + .../serial/impl/sp3mm4amg/include/ompGetICV.h | 41 + .../sp3mm4amg/include/ompi_config_minimal.h | 193 + base/serial/impl/sp3mm4amg/include/parser.h | 90 + .../impl/sp3mm4amg/include/sparseMatrix.h | 216 ++ .../sp3mm4amg/include/sparseUtilsGeneric.h | 138 + .../impl/sp3mm4amg/include/sparseUtilsMulti.h | 52 + base/serial/impl/sp3mm4amg/include/utils.h | 181 + .../sp3mm4amg/lib/linuxK_rbtree_minimalized.c | 414 +++ base/serial/impl/sp3mm4amg/lib/mmio.c | 511 +++ base/serial/impl/sp3mm4amg/lib/parser.c | 391 ++ cbind/base/psb_objhandle_mod.F90 | 33 +- 41 files changed, 10777 insertions(+), 32 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 base/serial/impl/sp3mm4amg/COPYING create mode 100644 base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Multi.c create mode 100644 base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c create mode 100644 base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Generic.c create mode 100644 base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Multi.c create mode 100644 base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c create mode 100644 base/serial/impl/sp3mm4amg/commons/ompGetICV.c create mode 100644 base/serial/impl/sp3mm4amg/commons/ompi_dims_create/mpi.h.in create mode 100644 base/serial/impl/sp3mm4amg/commons/ompi_dims_create/ompi_config_minimal.h create mode 100644 base/serial/impl/sp3mm4amg/commons/ompi_dims_create/ompi_dims_create.c create mode 100644 base/serial/impl/sp3mm4amg/commons/sparseUtilsGeneric.c create mode 100644 base/serial/impl/sp3mm4amg/commons/sparseUtilsMulti.c create mode 100644 base/serial/impl/sp3mm4amg/commons/utils.c create mode 100644 base/serial/impl/sp3mm4amg/fbind/psb_f_spmm_ub.c create mode 100644 base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h create mode 100644 base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Num_Generic.h create mode 100644 base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_SymbStep_Generic.h create mode 100644 base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_SymbStep_Multi.h create mode 100644 base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_UB_Generic.h create mode 100644 base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h create mode 100644 base/serial/impl/sp3mm4amg/include/SpMMUtilsMulti.h create mode 100644 base/serial/impl/sp3mm4amg/include/config.h create mode 100644 base/serial/impl/sp3mm4amg/include/inlineExports.h create mode 100644 base/serial/impl/sp3mm4amg/include/inlineExports_Generic.h create mode 100644 base/serial/impl/sp3mm4amg/include/linuxK_rbtree_minimalized.h create mode 100644 base/serial/impl/sp3mm4amg/include/macros.h create mode 100644 base/serial/impl/sp3mm4amg/include/macrosLinuxMock.h create mode 100644 base/serial/impl/sp3mm4amg/include/mmio.h create mode 100644 base/serial/impl/sp3mm4amg/include/ompChunksDivide.h create mode 100644 base/serial/impl/sp3mm4amg/include/ompGetICV.h create mode 100644 base/serial/impl/sp3mm4amg/include/ompi_config_minimal.h create mode 100644 base/serial/impl/sp3mm4amg/include/parser.h create mode 100644 base/serial/impl/sp3mm4amg/include/sparseMatrix.h create mode 100644 base/serial/impl/sp3mm4amg/include/sparseUtilsGeneric.h create mode 100644 base/serial/impl/sp3mm4amg/include/sparseUtilsMulti.h create mode 100644 base/serial/impl/sp3mm4amg/include/utils.h create mode 100644 base/serial/impl/sp3mm4amg/lib/linuxK_rbtree_minimalized.c create mode 100644 base/serial/impl/sp3mm4amg/lib/mmio.c create mode 100644 base/serial/impl/sp3mm4amg/lib/parser.c diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..b7b048ac --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "fortran.fortls.path": "/usr/bin/fortls", + "files.associations": { + "sp3mm_csr_omp_multi.h": "c", + "sp3mm_csr_omp_ub_generic.h": "c" + } +} \ No newline at end of file diff --git a/base/serial/impl/sp3mm4amg/COPYING b/base/serial/impl/sp3mm4amg/COPYING new file mode 100644 index 00000000..b1ed3a7f --- /dev/null +++ b/base/serial/impl/sp3mm4amg/COPYING @@ -0,0 +1,15 @@ +Copyright Andrea Di Iorio 2022 +This file is part of Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN +Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN. If not, see . + diff --git a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Multi.c b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Multi.c new file mode 100644 index 00000000..9982caaf --- /dev/null +++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Multi.c @@ -0,0 +1,66 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +//Get multiple implementation for C-Fortran indexing by re-define & re-include +#include +#include +#include +#include //TODO quick hold few CSR cols partition sizes +#include +//UB version deps +#include "Sp3MM_CSR_OMP_Multi.h" +#include "SpMMUtilsMulti.h" +#include "sparseUtilsMulti.h" +#include "ompChunksDivide.h" +#include "parser.h" +#include "utils.h" +#include "macros.h" +#include "sparseMatrix.h" + +#include "inlineExports.h" + +//Symb version deps +#include "Sp3MM_CSR_OMP_Multi.h" + + +//global vars -> audit +double Start,End,Elapsed,ElapsedInternal; + +#define OFF_F 0 +#include "inlineExports_Generic.h" +#include "Sp3MM_CSR_OMP_UB_Generic.c" +#include "Sp3MM_CSR_OMP_Num_Generic.c" +#undef OFF_F + +#define OFF_F 1 +#include "inlineExports_Generic.h" +#include "Sp3MM_CSR_OMP_UB_Generic.c" +#include "Sp3MM_CSR_OMP_Num_Generic.c" +#undef OFF_F diff --git a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c new file mode 100644 index 00000000..fc4306c1 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c @@ -0,0 +1,249 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*#pragma message( "compiling SpMM_CSR_OMP_Generic.c with OFF_F as:" STR(OFF_F) )*/ +#ifndef OFF_F + #error generic implementation requires OFF_F defined +#endif + +#ifndef SP3MM_OMP_SYMB +#define SP3MM_OMP_SYMB + +//allocCSRSpMatSymbStep aux function for IRP set by symb step output +static inline idx_t _setCSR_IRP_1DPartitioing(spmat* m, idx_t* rowSizes){ + idx_t r,cumulSize; + for (r=0,cumulSize=0; rM; cumulSize += rowSizes[r++]) + m->IRP[r] = cumulSize; + DEBUGCHECKS assert(cumulSize == rowSizes[m->M]); + m->IRP[m->M] = cumulSize; + return cumulSize; +} +static inline idx_t _setCSR_IRP_2DPartitioing(spmat* m,idx_t* rowSizes,ushort gridCols){ + idx_t cumulSize=0; + for (idx_t r=0,cumulSizeOld; rM; r++){ + m->IRP[r] = cumulSize; + for (ushort gc=0; gcM*gridCols]); + m->IRP[m->M] = cumulSize; + return cumulSize; +} +/* + * allocate CSR mat @m, setting up correctly IRP and other buffers allocations + * if @gridCols == 1: (1D partitioning of @m) -> @rowsSizes will be an array of row lenghts + * if @gridCols > 1: (2D partitioning of @m) -> @rowsSizes will be a matrix with + * elem i,j = len of j-th colPartition (out of @gridCols) of i-th row + */ +static inline int allocCSRSpMatSymbStep(spmat* m,idx_t* rowSizes,ushort gridCols){ + //setup IRP and get cumul, whole size of @m + if (!gridCols) return EXIT_FAILURE; + idx_t cumulSize; + if (gridCols == 1) cumulSize = _setCSR_IRP_1DPartitioing(m,rowSizes); + /*if (gridCols > 1} -- static analizer miss unsignedness...*/ + else cumulSize = _setCSR_IRP_2DPartitioing(m,rowSizes,gridCols); + m->IRP[m->M] = cumulSize; + m->NZ = cumulSize; + + if (!(m->AS = malloc(rowSizes[m->M] * sizeof(*m->AS))) ){ + ERRPRINT("allocCSRSpMatSymbStep m->AS malloc errd\n"); + return EXIT_FAILURE; + } + if (!(m->JA = malloc(rowSizes[m->M] * sizeof(*m->JA))) ){ + ERRPRINT("allocCSRSpMatSymbStep m->JA malloc errd\n"); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} +#endif +//////////////////// COMPUTE CORE Sp[3]MM SYMB-NUMB PHASE ////////////////////////// +////////Sp3MM as 2 x SpMM +///1D +spmat* CAT(spmmRowByRow_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ + ACC_DENSE *accVects = NULL,*acc; + SPMM_ACC* outAccumul=NULL; + idx_t* rowsSizes = NULL; + ///init AB matrix with SPMM heuristic preallocation + spmat* AB = allocSpMatrix(A->M,B->N); + if (!AB) goto _err; + ///aux structures alloc + if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){ + ERRPRINT("accVects init failed\n"); + goto _err; + } + ///SYMBOLIC STEP + if (!(rowsSizes = CAT(SpMM_Symb___,OFF_F) (cfg->symbMMRowImplID, A,B))) + goto _err; + if (allocCSRSpMatSymbStep(AB,rowsSizes,1)) goto _err; + + ///NUMERIC STEP + ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg); + AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); + #pragma omp parallel for schedule(runtime) private(acc) + for (idx_t r=0; rM; r++){ //row-by-row formulation + acc = accVects + omp_get_thread_num(); + _resetAccVect(acc); //rezero for the next A row + //for each A's row nnz, accumulate scalar vector product nnz.val * B[nnz.col] + /* direct use of sparse scalar vector multiplication + for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; jaIRP[r+1]-OFF_F; ja++){ + ca = A->JA[ja] - OFF_F; + jb = B->IRP[ca] - OFF_F; + bRowLen = B->IRP[ca+1] - B->IRP[ca]; + CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc); + } */ + for (ulong ja=A->IRP[r]-OFF_F; jaIRP[r+1]-OFF_F; ja++) //row-by-row formul + CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc); + //direct sparsify: trasform accumulated dense vector to a CSR row + sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API + } + #if OFF_F != 0 + C_FortranShiftIdxs(AB); + #endif + AUDIT_INTERNAL_TIMES End=omp_get_wtime(); + DEBUG checkOverallocPercent(rowsSizes,AB); + goto _free; + + _err: + if(AB) freeSpmat(AB); + AB=NULL; //nothing'll be returned + _free: + free(rowsSizes); + if(accVects) freeAccsDense(accVects,cfg->threadNum); + if(outAccumul) freeSpMMAcc(outAccumul); + + return AB; + +} +spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ + ACC_DENSE *accVects = NULL,*acc; + SPMM_ACC* outAccumul=NULL; + idx_t* rowsSizes = NULL; + ///init AB matrix with SPMM heuristic preallocation + spmat* AB = allocSpMatrix(A->M,B->N); + if (!AB) goto _err; + ///aux structures alloc + if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){ + ERRPRINT("accVects init failed\n"); + goto _err; + } + ///SYMBOLIC STEP + if (!(rowsSizes = CAT(SpMM_Symb___,OFF_F) (cfg->symbMMRowImplID, A,B))) + goto _err; + if (allocCSRSpMatSymbStep(AB,rowsSizes,1)) + goto _err; + + ///NUMERIC STEP + //perform Gustavson over rows blocks -> M / @cfg->gridRows + ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows; + ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg); + AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); + ulong b,startRow,block; //omp for aux vars + #pragma omp parallel for schedule(runtime) private(acc,startRow,block) + for (b=0; b < cfg->gridRows; b++){ + block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem); + startRow = UNIF_REMINDER_DISTRI_STARTIDX(b,rowBlock,rowBlockRem); + for (idx_t r=startRow; rIRP[r]-OFF_F,ca,jb,bRowLen; jaIRP[r+1]-OFF_F; ja++){ + ca = A->JA[ja] - OFF_F; + jb = B->IRP[ca] - OFF_F; + bRowLen = B->IRP[ca+1] - B->IRP[ca]; + CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc); + } */ + for (ulong ja=A->IRP[r]-OFF_F; jaIRP[r+1]-OFF_F; ja++) //row-by-row formul + CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc); + //direct sparsify: trasform accumulated dense vector to a CSR row + sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API + } + } + #if OFF_F != 0 + C_FortranShiftIdxs(AB); + #endif + AUDIT_INTERNAL_TIMES End=omp_get_wtime(); + DEBUG checkOverallocPercent(rowsSizes,AB); + goto _free; + + _err: + if(AB) freeSpmat(AB); + AB=NULL; //nothing'll be returned + _free: + if(rowsSizes) free(rowsSizes); + if(accVects) freeAccsDense(accVects,cfg->threadNum); + if(outAccumul) freeSpMMAcc(outAccumul); + + return AB; + +} +spmat* CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ + DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N); + idx_t* bColOffsets = NULL; //B group columns starting offset for each row + ACC_DENSE *accVectors=NULL,*accV; + SPACC* accRowPart; + spmat* AB = allocSpMatrix(A->M,B->N); + SPMM_ACC* outAccumul=NULL; + idx_t *rowsPartsSizes=NULL, *rowSizes=NULL; //for rows' cols partition, correct len + if (!AB) goto _err; + //2D indexing aux vars + ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; + ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; + ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; + ulong startRow,startCol,rowBlock,colBlock; //data division aux variables + ////get bColOffsets for B column groups + if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) + goto _err; + ///TODO TODO + _err: + if (AB) freeSpmat(AB); + AB = NULL; + _free: + free(rowsPartsSizes); + free(bColOffsets); + if (accVectors) freeAccsDense(accVectors,gridSize); + if (outAccumul) freeSpMMAcc(outAccumul); + + return AB; +} +/* TODO +///2D +//PARTITIONS NOT ALLOCATED +spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ +///SP3MM +spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,SPMM_INTERF spmm){ +////////Sp3MM direct +///1D +spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,SPMM_INTERF spmm){ +*/ diff --git a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Generic.c b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Generic.c new file mode 100644 index 00000000..c41242a5 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Generic.c @@ -0,0 +1,594 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * CSR Sp[3]MM Symbolic step implementations + * target: compute the output matrix size and the row lens for preallocation + * direct write out partial results + * See interfaces in respective header + */ + + +/*#pragma message( "compiling Sp3MM_CSR_OMP_Symb_Generic.c with config as:" \ + STR(OFF_F) " - " STR(OUT_IDXS) " - " STR(COL_PARTS) )*/ +#ifndef OFF_F + #error generic implementation requires OFF_F defined +#endif + +///setup aux macros for different signatures implementation via #if arith expr +#pragma push_macro("OUT_IDXS") +#pragma push_macro("_OUT_IDXS") +#pragma push_macro("COL_PARTS") +#pragma push_macro("_COL_PARTS") + +#ifdef OUT_IDXS + #define _OUT_IDXS TRUE +#else + #define _OUT_IDXS FALSE + #define OUT_IDXS _UNDEF +#endif +#ifdef COL_PARTS + #define _COL_PARTS TRUE +#else + #define _COL_PARTS FALSE + #define COL_PARTS _UNDEF +#endif +/// + +//////SpMM - rowByrow +///1row->matrix->outRow +//RBTREE based +/* + * Compute symbolic product of (nnz indexes of) row @aRowJA and matrix @b + * insert nnz indexes of the mul. result row as nodes in a rbtree rooted at @root + * with nodes in @nodes which have to be enough for the mul result row (use an UB) + * Retuns: multiplication result row NNZ number,se CONFIG_MACROS below for more + * + * CONFIG_MACROS: + * if _OUT_IDXS == TRUE return mul.result row nnz idxs in @outIdxs + * ifdef: OUT_IDXS_RBTREE_NODES: nnz indexes returned inplace sorting rbtree + * as nnz indexes(JA) of the mul result row + * else: stop at returning the mul. result row lenght + * if _COL_PARTS == TRUE return the number of nonzero elements in + * in each of the @gridCols column partitions inside @rowColPartsLens + * OFF_F: offset back indexes from fortran + * TODO also output indexes are shifted (see c_b ) + */ +static inline idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F) + ( + idx_t* aRowJA, idx_t aRowLen, spmat* b,rbRoot* root, rbNode* nodes + #if _OUT_IDXS == TRUE && !defined OUT_IDXS_RBTREE_NODES + ,idx_t* outIdxs + #endif + #if _COL_PARTS == TRUE + ,ushort gridCols,idx_t* rowColPartsLens + #endif + ) +{ + //Compute resulting ab's row non zero indexes and total lenght + idx_t abRowLen = 0; //mul.result row len, return value + for ( idx_t i=0,c_a,inserted; i < aRowLen; i++ ){ //for each entry in a's row + c_a = aRowJA[i]-OFF_F; + //gather diffrent nnz indexes in corresponding b's `c_a`th row + for ( idx_t j = b->IRP[c_a]-OFF_F,c_b; j < b->IRP[c_a+1]-OFF_F; j++ ){ + c_b = b->JA[j]-OFF_F; + //check if c_b is nonzero index for mul.result row + inserted = rbInsertNewKey (root, nodes+abRowLen, c_b); + abRowLen += inserted; //inserted needed just after this + /* LESS EFFICIENT THEN BELOW (here no memory of last colPart) + #if _COL_PARTS == TRUE //keep track of which col partition is c_b in + if (inserted) + rowColPartsLens[ matchingUnifRangeIdx(c_b, b->N, gridCols) ]++; + #endif */ + } + } + #if _OUT_IDXS == T && defined OUT_IDXS_RBTREE_NODES + /* return the non zero indexes of the mul.result row + * sorting inplace the nodes inserted in the rbtree */ + sortRbNode(nodes,abRowLen); + #elif _OUT_IDXS == T || _COL_PARTS == T + uint i=0; + idx_t k; + #if _COL_PARTS == T + //colParts aux vars + idx_t _colBlock = abRowLen / gridCols, _colBlockRem = abRowLen % gridCols; + ushort gc=0; + idx_t gcStartCol = unifRemShareStart(gc,_colBlock,_colBlockRem); + idx_t gcEndCol = unifRemShareEnd(gc,_colBlock,_colBlockRem); + #endif //_COL_PARTS == T + for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){ + k = rb_entry(n,rbNode,rb)->key; + #if _OUT_IDXS == T + //return the mul.result nnz index inside the rbNodes + outIdxs[ i++ ] = k; + #endif + #if _COL_PARTS == T + while (k >= gcEndCol ){ //see if the idx is in another col partition + // TODO also = since gcEndCol as k is 0based + gcEndCol = unifRemShareEnd(gc ,_colBlock, _colBlockRem); + gc++; + DEBUGCHECKS{ assert( gc < gridCols ); } + } + rowColPartsLens[gc]++; + #endif //_COL_PARTS == T + } + #endif //_OUT_IDXS == T ... _COL_PARTS == T + /*DEBUGCHECKS{ //TODO PRINT NNZ INDEXES FOR MANUAL (PAINFUL CHECK) + idx_t k; + for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){ + k = rb_entry(n,rbNode,rb)->key; + printf("%lu, ",k); + } + printf("\n"); + }*/ + + return abRowLen; +} + +//SPVECT_IDX_DENSE_MAP based TODO double implementation for trick syntax folding here... +/* + * SPVECT_IDX_DENSE_MAP based, as SpMM_Row_Symb_Rbtree but with idxMap aux idx keeping + * CONFIG_MACROS (new) + * IDX_RMUL_SYMB_RBTREE && ( _OUT_IDXS == T || _COL_PARTS == T ): + * (tmp) symb mult out indexes will be kept via a rbtree + * otherwise directly in the out array appending them and then sorting them + * (potentially same n log n) + */ +static inline idx_t CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F) + ( + idx_t* aRowJA, idx_t aRowLen, spmat* b, SPVECT_IDX_DENSE_MAP* idxsMapAcc + #if _OUT_IDXS == TRUE + ,idx_t* outIdxs + #endif + #if ( _OUT_IDXS == TRUE && IDX_RMUL_SYMB_RBTREE == T ) || _COL_PARTS == T + ,rbRoot* root, rbNode* nodes + #endif // _OUT_IDXS == TRUE + #if _COL_PARTS == TRUE + ,ushort gridCols,idx_t* rowColPartsLens + #endif + ) +{ + //Compute resulting ab's row non zero indexes and total lenght + idx_t abRowLen = 0; //mul.result row len, return value + for ( idx_t i=0,c_a,inserted; i < aRowLen; i++ ){ //for each entry in a's row + c_a = aRowJA[i]-OFF_F; + //gather diffrent nnz indexes in corresponding b's `c_a`th row + for ( idx_t j = b->IRP[c_a]-OFF_F,c_b; j < b->IRP[c_a+1]-OFF_F; j++ ){ + c_b = b->JA[j]-OFF_F; + //check if c_b is nonzero index for mul.result row + inserted = spVect_idx_in(c_b,idxsMapAcc); + #if _OUT_IDXS == T || _COL_PARTS == T //idxs HAS TO be accumulated + if (inserted) + #if IDX_RMUL_SYMB_RBTREE == T || _OUT_IDXS == F //add it in a RBTREE struct + rbInsertNewKey (root, nodes+idxsMapAcc->len, c_b); + #else //append it, then sort + outIdxs[idxsMapAcc->len] = c_b; + #endif //IDX_RMUL_SYMB_RBTREE == T //how accumulated key c_b + #endif //#if _OUT_IDXS == T || _COL_PARTS == T + } + } + abRowLen = idxsMapAcc->len; + //gather idxs or their sparsity struct in output row + #if _OUT_IDXS == T || _COL_PARTS == T + idx_t j = 0,k; + #if _COL_PARTS == T + //colParts aux vars + idx_t _colBlock = abRowLen / gridCols, _colBlockRem = abRowLen % gridCols; + ushort gc = 0; + idx_t gcStartCol = unifRemShareStart(gc,_colBlock,_colBlockRem); + idx_t gcEndCol = unifRemShareEnd(gc,_colBlock,_colBlockRem); + #endif + #if IDX_RMUL_SYMB_RBTREE == T || _OUT_IDXS == F ///idxs recorded in a aux rbtree + for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){ + k = rb_entry(n,rbNode,rb)->key; + #if _OUT_IDXS == T + outIdxs[ j++ ] = k; //record ordered key sotred from aux rbtree + #endif + #else ///idxs recorded in aux append array + sort_idx_t(outIdxs,abRowLen); + for (; j < abRowLen; j++){ + k = outIdxs[j]; //(OSS) already ordered in outIndexes arr + #endif //IDX_RMUL_SYMB_RBTREE == T + #if _COL_PARTS == T + while (k >= gcEndCol ){ //see if the idx is in another col partition + // TODO also = since gcEndCol as k is 0based + gcEndCol = unifRemShareEnd(gc ,_colBlock, _colBlockRem); + gc++; + DEBUGCHECKS{ assert( gc < gridCols ); } + } + rowColPartsLens[gc]++; + #endif //_COL_PARTS == T + } + #endif //_OUT_IDXS == T ... _COL_PARTS == T + return abRowLen; +} + +//switch among 2 row_symb_XXX implemenetation aux +/* + * SpMM single row symbolic computation + * select one implementation via @implID + * among SpMM_Row_Symb_Rbtree or SpMM_Row_Symb_IdxMap + * args will be forwared accordingly + */ +static inline idx_t CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F) + ( + ROW_MMSYM_IMPL_MODE implID, idx_t* aRowJA, idx_t aRowLen, spmat* b, + rbRoot* root, rbNode* nodes, SPVECT_IDX_DENSE_MAP* idxsMapAcc + #if _OUT_IDXS == TRUE + ,idx_t* outIdxs + #endif + #if _COL_PARTS == TRUE + ,ushort gridCols,idx_t* rowColPartsLens + #endif + ) +{ + if (implID == RBTREE) { + return CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F) + ( + aRowJA,aRowLen,b,root,nodes + #if _OUT_IDXS == TRUE && !defined OUT_IDXS_RBTREE_NODES + ,outIdxs + #endif + #if _COL_PARTS == TRUE + ,gridCols,rowColPartsLens + #endif + ); + } + else { //IDXMAP + return CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F) + ( + aRowJA,aRowLen,b,idxsMapAcc + #if _OUT_IDXS == TRUE + ,outIdxs + #endif + #if ( _OUT_IDXS == TRUE && IDX_RMUL_SYMB_RBTREE == T ) || _COL_PARTS == T + ,root, nodes + #endif + #if _COL_PARTS == TRUE + ,gridCols, rowColPartsLens + #endif + ); + } +} +///SpMM row-by-row +idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F) + ( + ROW_MMSYM_IMPL_MODE symbRowImplID, spmat* a, spmat* b + #if _OUT_IDXS == TRUE + ,idx_t*** outIdxs + #endif + #if _COL_PARTS == TRUE + ,ushort gridCols, idx_t** rowColPartsLens + #endif + ) +{ + + ///initial allocations + rbRoot* rbRoots = NULL; + rbNode* rbNodes = NULL; + SPVECT_IDX_DENSE_MAP* idxsMapAccs = NULL; + idx_t *rowLens=NULL,*upperBoundedRowsLens=NULL,*upperBoundedSymMat=NULL; + idx_t maxRowLen=0; + int rbTreeUsed = (symbRowImplID == RBTREE || + (IDX_RMUL_SYMB_RBTREE && (_COL_PARTS || _OUT_IDXS)) ); + + if ( !(rowLens = malloc(sizeof(*rowLens) * (a->M+1))) ){ + ERRPRINT("SpMM_Symb_ rowLens malloc errd\n"); + goto _err; + } + if (_OUT_IDXS == TRUE || rbTreeUsed ){ + if (!(upperBoundedRowsLens = CAT(spMMSizeUpperbound_,OFF_F)(a,b))) + goto _err; + } + #if _OUT_IDXS == TRUE + if (!(*outIdxs = malloc(sizeof(**outIdxs) * a->M))){ + ERRPRINT("SpMM_Symb_ outIdxs malloc errd\n"); + goto _err; + } + if (!(upperBoundedSymMat = malloc( + sizeof(*upperBoundedSymMat)*upperBoundedRowsLens[a->M]))){ + ERRPRINT("SpMM_Symb_ upperBoundedSymMat malloc errd\n"); + goto _err; + } + //write rows' start pointer from full matrix JA allocated + for (idx_t i=0,cumul=0; iM; cumul += upperBoundedRowsLens[i++]) + *outIdxs[i] = upperBoundedSymMat + cumul; + #endif //#if _OUT_IDXS == TRUE + #if _COL_PARTS == TRUE + if (!(*rowColPartsLens = malloc(a->M * gridCols * sizeof(**rowColPartsLens)))){ + ERRPRINT("SpMM_Symb_ rowColPartsLens malloc errd\n"); + goto _err; + } + #endif //_COL_PARTS + uint maxThreads = omp_get_max_threads(); //TODO FROM CFG + //index keeping aux struct + //rbtree implementation or idxMap with aux of symbTree for tmp outIdx keeping + if ( rbTreeUsed ){ + maxRowLen = reductionMaxSeq(upperBoundedRowsLens, a->M); + //rbTrees for index keeping + rbRoots = malloc(maxThreads * sizeof(*rbRoots)); + rbNodes = calloc(maxThreads * maxRowLen, sizeof(*rbNodes)); + if( !rbRoots || !rbNodes ){ + ERRPRINT("SpMM_Symb_ threads' aux rbTree mallocs errd\n"); + goto _err; + } + //init roots + for (uint i=0; iN,idxsMapAccs+i)) goto _err; + } + } + ///rows parallel compute + idx_t* aRow; + idx_t aRowLen,rLen,abCumulLen=0; + int tid; + rbRoot* tRoot; rbNode* tNodes; + SPVECT_IDX_DENSE_MAP* tIdxsMapAcc = NULL; + #pragma omp parallel for schedule(static) \ + private(aRow,aRowLen,rLen, tRoot,tNodes,tid) reduction(+:abCumulLen) + for(idx_t r=0; rM; r++){ + aRow = a->JA + a->IRP[r]-OFF_F; + aRowLen = a->IRP[r+1] - a->IRP[r]; + tid = omp_get_thread_num(); + //TODO low overhead pointer airth can be avoided with if (symbRowImplID .. && ) + tIdxsMapAcc = idxsMapAccs + tid; + tRoot = rbRoots + tid; + tNodes = rbNodes + tid * maxRowLen; + + rLen = CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F) + ( + symbRowImplID, aRow, aRowLen, b, tRoot,tNodes, tIdxsMapAcc + #if _OUT_IDXS == TRUE + ,*outIdxs[r] + #endif + #if _COL_PARTS == TRUE + ,gridCols, (*rowColPartsLens) + IDX2D(r,0,gridCols) + #endif + ); + rowLens[r] = rLen; + abCumulLen += rLen; + ///reset symb idxs keeping aux structs + if (symbRowImplID==RBTREE || (IDX_RMUL_SYMB_RBTREE && (_COL_PARTS || _OUT_IDXS))){ + *tRoot = RB_ROOT_CACHED; + memset(tNodes,0,rLen * sizeof(*tNodes)); + } + if (symbRowImplID == IDXMAP) _resetIdxMap(tIdxsMapAcc); + + } + rowLens[a->M] = abCumulLen; + goto _free; + + _err: + free(rowLens); + #if _OUT_IDXS == T + if (outIdxs) free(*outIdxs); + #endif + #if _COL_PARTS == T + if (rowColPartsLens) free(*rowColPartsLens); + #endif + rowLens = NULL; + _free: + free(upperBoundedRowsLens); + free(upperBoundedSymMat); + free(rbRoots); + free(rbNodes); + if (idxsMapAccs){ + for (uint i=0; ikey; + for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){ + outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key; + } + #else + /* return the non zero indexes of the mul.result row + * sorting inplace the nodes inserted in the rbtree */ + sortRbNode(nodes,abcRowLen); + #endif + #endif + return abcRowLen; +} + +idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F) + ( + ROW_MMSYM_IMPL_MODE symbMMRowImplID, spmat* a, spmat* b, spmat* c + #if _OUT_IDXS == TRUE + ,idx_t*** outIdxs + #endif + ) +{ + //idxs keeping aux buffs + idx_t* abRowsJATmp = NULL; + rbRoot* rbRoots = NULL; rbNode* rbNodes = NULL; + SPVECT_IDX_DENSE_MAP* idxsMapAccs = NULL; + + idx_t *abUpperBoundedRowsLens = NULL, *upperBoundedSymMat = NULL; + ///initial allocations + idx_t* rowLens = malloc(sizeof(*rowLens) * (a->M +1) ); //to return + if (!rowLens){ + ERRPRINT("SpMM_Symb_ rowLens malloc errd\n"); + goto _err; + } + + #if _OUT_IDXS == TRUE + if (!(*outIdxs = malloc(sizeof(**outIdxs) * a->M))){ + ERRPRINT("SpMM_Symb_ outIdxs malloc errd\n"); + goto _err; + } + if (!(abUpperBoundedRowsLens = CAT(spMMSizeUpperbound_,OFF_F)(a,b))) + goto _err; + /*TODO TODO instead of doing one sym product first to have a correct UB + * use an heuristics here to get output matrix size + */ + idx_t abcUBSize = abUpperBoundedRowsLens[a->M] * SP3MM_UB_HEURISTIC; + if (!(upperBoundedSymMat=malloc(sizeof(*upperBoundedSymMat)*abcUBSize))){ + ERRPRINT("SpMM_Symb_ upperBoundedSymMat malloc errd\n"); + goto _err; + } + //TODO heuristic TO UB rows bounds ... require compacting copy + for (idx_t i=0,cumul=0; iM; + cumul += SP3MM_UB_HEURISTIC * abUpperBoundedRowsLens[i++]) + *outIdxs[i] = upperBoundedSymMat + cumul; + #endif //#if _OUT_IDXS == TRUE + //rbTrees for index keeping + uint maxThreads = omp_get_max_threads(); //TODO FROM CFG + idx_t abMaxRowLen = reductionMaxSeq(abUpperBoundedRowsLens, a->M); + #ifdef HEURISTICS_UB + idx_t maxRowLenUB = abMaxRowLen * SP3MM_UB_HEURISTIC; //TODO UB HEURISTC + #else + idx_t maxRowLenUB = c->N; + #endif //HEURISTICS_UB + + if (!(abRowsJATmp = malloc(maxThreads*maxRowLenUB*sizeof(*abRowsJATmp)) ) ){ + ERRPRINT("Sp3MM_Symb_ abRowsJATmp malloc errd\n"); + goto _err; + } + if (symbMMRowImplID == RBTREE || IDX_RMUL_SYMB_RBTREE ){ + rbNodes = malloc(maxThreads * maxRowLenUB * sizeof(*rbNodes)); + rbRoots = malloc(maxThreads * sizeof(*rbRoots)); + if (!rbRoots || !rbNodes ){ + ERRPRINT("Sp3MM_Symb_ rbRoots || rbNodes malloc errd\n"); + goto _err; + } + //init roots + for (uint i=0; iN,idxsMapAccs+i)) goto _err; + } + } + ///rows parallel compute + idx_t* aRow; + idx_t aRowLen,rLen,outCumulLen=0; + //threads local pointers + int tid; + rbRoot* tRoot; + rbNode* tNodes; + SPVECT_IDX_DENSE_MAP* tIdxsMapAcc; + idx_t* tABRowJATmp; + + #pragma omp parallel for schedule(static) \ + private(aRow,aRowLen,rLen, tRoot,tNodes,tid) reduction(+:outCumulLen) + for(idx_t r=0; rM; r++){ + aRow = a->JA + a->IRP[r]-OFF_F; + aRowLen = a->IRP[r+1] - a->IRP[r]; + tid = omp_get_thread_num(); + tRoot = rbRoots + tid; + tNodes = rbNodes + tid * maxRowLenUB; + tIdxsMapAcc = NULL; //TODO + tABRowJATmp = abRowsJATmp + tid * maxRowLenUB; + rLen = CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F) + (symbMMRowImplID, aRow,aRowLen,b,c,tRoot,tNodes,tIdxsMapAcc,tABRowJATmp, + #if _OUT_IDXS == TRUE + *outIdxs[r] + #endif + ); + outCumulLen += rLen; + rowLens[r] = rLen; + + } + goto _free; + _err: + free(rowLens); + #if _OUT_IDXS == T + if (*outIdxs) free(*outIdxs); + #endif + rowLens = NULL; + _free: + free(abUpperBoundedRowsLens); + free(upperBoundedSymMat); + free(rbRoots); + free(rbNodes); + free(abRowsJATmp); + + return rowLens; +} + +#endif //#if !defined COL_PARTS && defined OUT_IDXS + + +///restore aux macros entry state +//#undef _OUT_ID +//#undef _COL_PARTS +#pragma pop_macro("OUT_IDXS") +#pragma pop_macro("_OUT_IDXS") +#pragma pop_macro("COL_PARTS") +#pragma pop_macro("_COL_PARTS") + + + + diff --git a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Multi.c b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Multi.c new file mode 100644 index 00000000..9305cfbf --- /dev/null +++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Multi.c @@ -0,0 +1,92 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include "macros.h" +#include "config.h" +#include "sparseMatrix.h" +#include "linuxK_rbtree_minimalized.h" + +#include "Sp3MM_CSR_OMP_SymbStep_Multi.h" + +#include "sparseUtilsMulti.h" +#include "utils.h" +#include + +//inline exports +//ushort matchingUnifRangeIdx(idx_t idx, idx_t size, ushort rangesN); +idx_t* CAT(spMMSizeUpperbound_,0)(spmat* ,spmat* ); +idx_t* CAT(spMMSizeUpperbound_,1)(spmat* ,spmat* ); +idx_t* CAT(spMMSizeUpperboundColParts_,0)(spmat* ,spmat* ,ushort); +idx_t* CAT(spMMSizeUpperboundColParts_,1)(spmat* ,spmat* ,ushort); + +/* Multi implementation of symbolic product of sparse matrixes, config macros + * OFF_F: C-Fortran spmat indexing + * OUT_IDXS: indexes output + * COL_PARTS: partitioning columns output... + */ + +#define OUT_IDXS_ON OutIdxs_ +#define COL_PARTS_ON ColParts_ +#undef OUT_IDXS +#undef COL_PARTS + +#define OFF_F 0 + ///generate basic versions + #include "Sp3MM_CSR_OMP_SymbStep_Generic.c" + ///generate outIdxs versions + #define OUT_IDXS OUT_IDXS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.c" + #undef OUT_IDXS + ///generate colParts versions + #define COL_PARTS COL_PARTS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.c" + //generate outIdxs AND colParts versions + #define OUT_IDXS OUT_IDXS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.c" + + #undef OUT_IDXS + #undef COL_PARTS +#undef OFF_F +#define OFF_F 1 + ///generate basic versions + #include "Sp3MM_CSR_OMP_SymbStep_Generic.c" + ///generate outIdxs versions + #define OUT_IDXS OUT_IDXS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.c" + #undef OUT_IDXS + ///generate colParts versions + #define COL_PARTS COL_PARTS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.c" + //generate outIdxs AND colParts versions + #define OUT_IDXS OUT_IDXS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.c" + + #undef OUT_IDXS + #undef COL_PARTS +#undef OFF_F diff --git a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c new file mode 100644 index 00000000..4cc1cfd4 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c @@ -0,0 +1,573 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*#pragma message( "compiling SpMM_CSR_OMP_Generic.c with OFF_F as:" STR(OFF_F) )*/ +#ifndef OFF_F + #error generic implementation requires OFF_F defined +#endif + + + +//////////////////// COMPUTE CORE Sp[3]MM Upperbound ////////////////////////// +spmat* CAT(spmmSerial_,OFF_F)(spmat* A,spmat* B, CONFIG* _cfg){ //serial implementation + spmat* AB = NULL; + ACC_DENSE acc; + if ( allocAccDense(&acc,B->N) ) goto _free; + if (!(AB = allocSpMatrix(A->M,B->N))) goto _free; + for( idx_t r=0; rM; r++ ){ + for (ulong c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) //row-by-row formul + CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, &acc); + sparsifyDirect(&acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API + } + _free: + freeAccsDense(&acc,1); + + return AB; +} +////////Sp3MM as 2 x SpMM +///1D +spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ + DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N); + ///thread aux + ACC_DENSE *accVects = NULL,*acc; + SPMM_ACC* outAccumul=NULL; + idx_t* rowsSizes = NULL; + ///init AB matrix with SPMM heuristic preallocation + spmat* AB = allocSpMatrix(A->M,B->N); + if (!AB) goto _err; + if (!(rowsSizes = CAT(spMMSizeUpperbound_,OFF_F) (A,B))) goto _err; + ///aux structures alloc + if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){ + ERRPRINT("accVects init failed\n"); + goto _err; + } + if (!(outAccumul = initSpMMAcc(rowsSizes[AB->M],AB->M))) goto _err; + #if SPARSIFY_PRE_PARTITIONING == T + //prepare sparse accumulators with U.Bounded rows[parts] starts + SPACC* accSp; + for( idx_t r=0,rSizeCumul=0; rM; rSizeCumul += rowsSizes[r++]){ + accSp = outAccumul->accs+r; + accSp->JA = outAccumul->JA + rSizeCumul; + accSp->AS = outAccumul->AS + rSizeCumul; + //accSp->len = rowsSizes[r]; + } + #endif + + ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg); + AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); + #pragma omp parallel for schedule(runtime) private(acc) + for (ulong r=0; rM; r++){ //row-by-row formulation + //iterate over nz entry index c inside current row r + acc = accVects + omp_get_thread_num(); + /* direct use of sparse scalar vector multiplication + for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; jaIRP[r+1]-OFF_F; ja++){ + ca = A->JA[ja] - OFF_F; + jb = B->IRP[ca] - OFF_F; + bRowLen = B->IRP[ca+1] - B->IRP[ca]; + CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc); + }*/ + for (ulong c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) //row-by-row formul + CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc); + //trasform accumulated dense vector to a CSR row + #if SPARSIFY_PRE_PARTITIONING == T + _sparsifyUB(acc,outAccumul->accs+r,0); + #else + sparsifyUBNoPartsBounds(outAccumul,acc,outAccumul->accs + r,0); + #endif + _resetAccVect(acc); //rezero for the next A row + } + ///merge sparse row computed before + if (mergeRows(outAccumul->accs,AB)) goto _err; + #if OFF_F != 0 + C_FortranShiftIdxs(AB); + #endif + AUDIT_INTERNAL_TIMES End=omp_get_wtime(); + DEBUG checkOverallocPercent(rowsSizes,AB); + goto _free; + + _err: + if(AB) freeSpmat(AB); + AB=NULL; //nothing'll be returned + _free: + if(rowsSizes) free(rowsSizes); + if(accVects) freeAccsDense(accVects,cfg->threadNum); + if(outAccumul) freeSpMMAcc(outAccumul); + + return AB; +} + +spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ + DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N); + DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); + ///thread aux + ACC_DENSE *accVects = NULL,*acc; + SPMM_ACC* outAccumul=NULL; + idx_t* rowsSizes = NULL; + ///init AB matrix with SPMM heuristic preallocation + spmat* AB = allocSpMatrix(A->M,B->N); + if (!AB) goto _err; + if (!(rowsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B))) goto _err; + ///aux structures alloc + if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){ + ERRPRINT("accVects init failed\n"); + goto _err; + } + if (!(outAccumul = initSpMMAcc(rowsSizes[AB->M],AB->M))) goto _err; + #if SPARSIFY_PRE_PARTITIONING == T + //prepare sparse accumulators with U.Bounded rows[parts] starts + SPACC* accSp; + for( idx_t r=0,rSizeCumul=0; rM; rSizeCumul += rowsSizes[r++]){ + accSp = outAccumul->accs+r; + accSp->JA = outAccumul->JA + rSizeCumul; + accSp->AS = outAccumul->AS + rSizeCumul; + } + #endif + + //perform Gustavson over rows blocks -> M / @cfg->gridRows + ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows; + ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg); + AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); + ulong b,startRow,block; //omp for aux vars + #pragma omp parallel for schedule(runtime) private(acc,startRow,block) + for (b=0; b < cfg->gridRows; b++){ + block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem); + startRow= UNIF_REMINDER_DISTRI_STARTIDX(b,rowBlock,rowBlockRem); + acc = accVects + omp_get_thread_num(); + + DEBUGPRINT{ + fflush(NULL); + printf("block %lu\t%lu:%lu(%lu)\n",b,startRow,startRow+block-1,block); + fflush(NULL); + } + //row-by-row formulation in the given row block + for (ulong r=startRow; rIRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) + CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc); + //trasform accumulated dense vector to a CSR row + #if SPARSIFY_PRE_PARTITIONING == T + _sparsifyUB(acc,outAccumul->accs+r,0); + #else + sparsifyUBNoPartsBounds(outAccumul,acc,outAccumul->accs + r,0); + #endif + _resetAccVect(acc); //rezero for the next A row + } + } + ///merge sparse row computed before + if (mergeRows(outAccumul->accs,AB)) goto _err; + #if OFF_F != 0 + C_FortranShiftIdxs(AB); + #endif + AUDIT_INTERNAL_TIMES End=omp_get_wtime(); + DEBUG checkOverallocPercent(rowsSizes,AB); + goto _free; + + _err: + if(AB) freeSpmat(AB); + AB=NULL; //nothing'll be returned + _free: + if(rowsSizes) free(rowsSizes); + if(accVects) freeAccsDense(accVects,cfg->threadNum); + if(outAccumul) freeSpMMAcc(outAccumul); + + return AB; +} + +///2D +//PARTITIONS NOT ALLOCATED +spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ + DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N); + DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); + idx_t* bColOffsets = NULL; //B group columns starting offset for each row + ACC_DENSE *accVectors=NULL,*accV; + SPACC* accRowPart; + spmat* AB = allocSpMatrix(A->M,B->N); + SPMM_ACC* outAccumul=NULL; + idx_t* rowsPartsSizes=NULL; + if (!AB) goto _err; + //2D indexing aux vars + ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; + ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; + ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; + ulong startRow,startCol,rowBlock,colBlock; //data division aux variables + ////get bColOffsets for B column groups + if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) + goto _err; + #if SPARSIFY_PRE_PARTITIONING == T + uint rowsPartsSizesN = aSubRowsN; + if (!(rowsPartsSizes = CAT(spMMSizeUpperboundColParts_,OFF_F) + (A,B,cfg->gridCols,bColOffsets))) + #else + uint rowsPartsSizesN = AB->M; + if (!(rowsPartsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B))) + #endif + goto _err; + + //aux vectors + + ///other AUX struct alloc + if (!(accVectors = _initAccVectors(gridSize,_colBlock+(_colBlockRem?1:0)))){ + ERRPRINT("accVectors calloc failed\n"); + goto _err; + } + if (!(outAccumul = initSpMMAcc(rowsPartsSizes[rowsPartsSizesN],aSubRowsN))) + goto _err; + #if SPARSIFY_PRE_PARTITIONING == T + //prepare sparse accumulators with U.Bounded rows[parts] starts + SPACC* accSp; + for( idx_t i=0,rSizeCumul=0; iaccs+i; + accSp->JA = outAccumul->JA + rSizeCumul; + accSp->AS = outAccumul->AS + rSizeCumul; + } + //memset(outAccumul->AS,0,sizeof(double)*rowsSizes[AB->M]);memset(outAccumul->JA,0,sizeof(idx_t)*rowsSizes[AB->M]); + #endif + + ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg); + AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); + ulong tileID,t_i,t_j; //for aux vars + ulong bPartLen,bPartID,bPartOffset;//B partition acces aux vars + #pragma omp parallel for schedule(runtime) \ + private(accV,accRowPart,rowBlock,colBlock,startRow,startCol,\ + bPartLen,bPartID,bPartOffset,t_i,t_j) + for (tileID = 0; tileID < gridSize; tileID++){ + ///get iteration's indexing variables + //tile index in the 2D grid of AB computation TODO OMP HOW TO PARALLELIZE 2 FOR + t_i = tileID/cfg->gridCols; //i-th row block + t_j = tileID%cfg->gridCols; //j-th col block + //get tile row-cols group FAIR sizes + rowBlock = UNIF_REMINDER_DISTRI(t_i,_rowBlock,_rowBlockRem); + startRow = UNIF_REMINDER_DISTRI_STARTIDX(t_i,_rowBlock,_rowBlockRem); + startCol = UNIF_REMINDER_DISTRI_STARTIDX(t_j,_colBlock,_colBlockRem); + + accV = accVectors + tileID; + + DEBUGPRINT{ + fflush(NULL); + colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem); + printf("rowBlock [%lu\t%lu:%lu(%lu)]\t",t_i,startRow,startRow+rowBlock-1,rowBlock); + printf("colBlock [%lu\t%lu:%lu(%lu)]\n",t_j,startCol,startCol+colBlock-1,colBlock); + fflush(NULL); + } + ///AB[t_i][t_j] block compute + for (ulong r=startRow; rIRP[r]-OFF_F,c; jIRP[r+1]-OFF_F; j++){ + //get start of B[A->JA[j]][:colBlock:] + c = A->JA[j]-OFF_F; // col of nnz in A[r][:] <-> target B row + bPartID = IDX2D(c,t_j,cfg->gridCols); + bPartOffset = bColOffsets[ bPartID ]; + bPartLen = bColOffsets[ bPartID + 1 ] - bPartOffset; + + CAT(scSparseVectMulPart_,OFF_F)(A->AS[j],B->AS+bPartOffset, + B->JA+bPartOffset,bPartLen,startCol,accV); + } + + accRowPart = outAccumul->accs + IDX2D(r,t_j,cfg->gridCols); + #if SPARSIFY_PRE_PARTITIONING == T + _sparsifyUB(accV,accRowPart,startCol); + #else + sparsifyUBNoPartsBounds(outAccumul,accV,accRowPart,startCol); + #endif + _resetAccVect(accV); + } + } + if (mergeRowsPartitions(outAccumul->accs,AB,cfg)) goto _err; + #if OFF_F != 0 + C_FortranShiftIdxs(AB); + #endif + AUDIT_INTERNAL_TIMES End=omp_get_wtime(); + DEBUG + CAT(checkOverallocRowPartsPercent_, OFF_F)(rowsPartsSizes, AB, cfg->gridCols, bColOffsets); + goto _free; + + _err: + if (AB) freeSpmat(AB); + AB = NULL; + _free: + free(rowsPartsSizes); + free(bColOffsets); + if (accVectors) freeAccsDense(accVectors,gridSize); + if (outAccumul) freeSpMMAcc(outAccumul); + + return AB; + +} + +spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ + DEBUG printf("spmm\trowBlocks of A,\tcolBlocks (allcd) of B\tM=%luxN=%lu\n",A->M,B->N); + DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); + spmat *AB = NULL, *colPartsB = NULL, *colPart; + idx_t* rowsPartsSizes=NULL; + //aux vectors + SPMM_ACC* outAccumul=NULL; + ACC_DENSE *accVectors=NULL,*accV; + SPACC* accRowPart; + ulong startRow,startCol,rowBlock,colBlock; //data division aux variables + //2D indexing aux vars + idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; + idx_t* bColOffsets = NULL; + + if (!(AB = allocSpMatrix(A->M,B->N))) goto _err; + ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; + ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; + + ////B cols partition in CSRs + //if (!(colPartsB = CAT(colsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) goto _err; + if (!(colPartsB = CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F) + (B, cfg->gridCols, &bColOffsets))) + goto _err; + #if SPARSIFY_PRE_PARTITIONING == T + uint rowsPartsSizesN = aSubRowsN; + if (!(rowsPartsSizes = CAT(spMMSizeUpperboundColParts_,OFF_F) + (A, B, cfg->gridCols, bColOffsets))) + #else + uint rowsPartsSizesN = AB->M; + if (!(rowsPartsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B))) + #endif + goto _err; + ///other AUX struct alloc + if (!(accVectors = _initAccVectors(gridSize,_colBlock+(_colBlockRem?1:0)))){ + ERRPRINT("accVectors calloc failed\n"); + goto _err; + } + if (!(outAccumul = initSpMMAcc(rowsPartsSizes[rowsPartsSizesN],aSubRowsN))) + goto _err; + #if SPARSIFY_PRE_PARTITIONING == T + //prepare sparse accumulators with U.Bounded rows[parts] starts + SPACC* accSp; + for( idx_t i=0,rLenCumul=0; iaccs+i; + accSp->JA = outAccumul->JA + rLenCumul; + accSp->AS = outAccumul->AS + rLenCumul; + } + #endif + + ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg); + AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); + ulong tileID,t_i,t_j; //for aux vars + #pragma omp parallel for schedule(runtime) \ + private(accV,accRowPart,colPart,rowBlock,colBlock,startRow,startCol,t_i,t_j) + for (tileID = 0; tileID < gridSize; tileID++){ + ///get iteration's indexing variables + //tile index in the 2D grid of AB computation TODO OMP HOW TO PARALLELIZE 2 FOR + t_i = tileID/cfg->gridCols; //i-th row block + t_j = tileID%cfg->gridCols; //j-th col block + //get tile row-cols group FAIR sizes + rowBlock = UNIF_REMINDER_DISTRI(t_i,_rowBlock,_rowBlockRem); + colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem); + startRow = UNIF_REMINDER_DISTRI_STARTIDX(t_i,_rowBlock,_rowBlockRem); + startCol = UNIF_REMINDER_DISTRI_STARTIDX(t_j,_colBlock,_colBlockRem); + + colPart = colPartsB + t_j; + accV = accVectors + tileID; + + DEBUGPRINT{ + fflush(NULL); + printf("rowBlock [%lu\t%lu:%lu(%lu)]\t", + t_i,startRow,startRow+rowBlock-1,rowBlock); + printf("colBlock [%lu\t%lu:%lu(%lu)]\n", + t_j,startCol,startCol+colBlock-1,colBlock); + fflush(NULL); + } + ///AB[t_i][t_j] block compute + for (ulong r=startRow; rIRP[r]-OFF_F,c,bRowStart,bRowLen; jIRP[r+1]-OFF_F; j++){ + //get start of B[A->JA[j]][:colBlock:] + c = A->JA[j]-OFF_F; // column of nnz entry in A[r][:] <-> target B row + bRowStart = colPart->IRP[c]; + #ifdef ROWLENS + bRowLen = colPart->RL[c]; + #else + bRowLen = colPart->IRP[c+1] - bRowStart; + #endif + CAT(scSparseVectMulPart_,OFF_F)(A->AS[j], + colPart->AS+bRowStart,colPart->JA+bRowStart, + bRowLen,startCol,accV); + } + + accRowPart = outAccumul->accs + IDX2D(r,t_j,cfg->gridCols); + #if SPARSIFY_PRE_PARTITIONING == T + _sparsifyUB(accV,accRowPart,startCol); + #else + sparsifyUBNoPartsBounds(outAccumul,accV,accRowPart,startCol); + #endif + _resetAccVect(accV); + } + } + if (mergeRowsPartitions(outAccumul->accs,AB,cfg)) goto _err; + #if OFF_F != 0 + C_FortranShiftIdxs(AB); + #endif + AUDIT_INTERNAL_TIMES End=omp_get_wtime(); + DEBUG + CAT(checkOverallocRowPartsPercent_, OFF_F)(rowsPartsSizes,AB,cfg->gridCols,bColOffsets); + goto _free; + + _err: + ERRPRINT("spmmRowByRow2DBlocksAllocated failed\n"); + if (AB) freeSpmat(AB); + AB = NULL; + _free: + if (colPartsB){ + for (ulong i=0; igridCols; i++) + freeSpmatInternal(colPartsB+i); + free(colPartsB); + } + free(rowsPartsSizes); + free(bColOffsets); + if (accVectors) freeAccsDense(accVectors,gridSize); + if (outAccumul) freeSpMMAcc(outAccumul); + + return AB; + +} +///SP3MM +spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P, + CONFIG* cfg,SPMM_INTERF spmm){ + + double end,start,elapsed,partial,flops; + spmat *RAC = NULL, *out = NULL; + + if (!spmm){ + //TODO runtime on sizes decide witch spmm implementation to use if not given + spmm = &CAT(spmmRowByRow2DBlocks_,OFF_F); + } + /* TODO + alloc dense aux vector, reusable over 3 product + TODO arrays sovrallocati per poter essere riusati nelle 2 SpMM + ulong auxVectSize = MAX(R->N,AC->N); + auxVectSize = MAX(auxVectSize,P->N); + */ + + start = omp_get_wtime(); + /// triple product as a pair of spmm + if (!(RAC = spmm(R,AC,cfg))) goto _free; + AUDIT_INTERNAL_TIMES partial = End - Start; + if (!(out = spmm(RAC,P,cfg))) goto _free; + // + end = omp_get_wtime(); + ElapsedInternal = End - Start + partial; + VERBOSE { + elapsed = end - start; + flops = (2 * R->NZ * P->NZ * AC->NZ) / (elapsed); + printf("elapsed %le - flops %le",elapsed,flops); + AUDIT_INTERNAL_TIMES + printf("\tinternalTime: %le",ElapsedInternal); + printf("\n"); + } + _free: + zeroSpmat(RAC); + freeSpmat(RAC); + + return out; +} + +////////Sp3MM direct +///1D +spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg, + SPMM_INTERF spmm){ + + ulong* rowSizes = NULL; + SPMM_ACC* outAccumul=NULL; + ACC_DENSE *accVectorsR_AC=NULL,*accVectorsRAC_P=NULL,*accRAC,*accRACP; + + ///init AB matrix with SPMM heuristic preallocation + spmat* out = allocSpMatrix(R->M,P->N); + if (!out) goto _err; + /*TODO 3MM VERSION COMPUTE OUT ALLOC : + -> \forall RAC.row -> hashmap{col=True}->(AVL||RBTHREE); upperBound std col RAC.rows.cols in hashmap || SYM_bis + * NB: UP per RACP => NN note dimensioni righe precise => stesso approccio riservazione spazio di spmm ( fetch_and_add ) + * SYM_BIS ==> note dimensioni righe => + * 1) pre riservazione spazio per righe -> cache allignement per threads + -(sc. static & blocco di righe allineato a cache block successivo a blocco righe precedente) + -(sc. dynamic& righe tutte allineate a cache block (NO OVERLAPS!) -> huge overhead ? + * 2) pre riservazione spazio righe diretamente in out CSR + -> probabili cache blocks overlap; salvo costo di P.M memcpy + */ + if (!(rowSizes = CAT(spMMSizeUpperbound_,OFF_F)(R,AC))) goto _err; ///TODO TOO LOOSE UB...INTEGRATE RBTREE FOR SYM->PRECISE + ///aux structures alloc + if (!(outAccumul = initSpMMAcc(rowSizes[R->M],P->M))) goto _err; //TODO size estimated with RAC mat + if (!(accVectorsR_AC = _initAccVectors(cfg->threadNum,AC->N))){ //TODO LESS || REUSE + ERRPRINT("accVectorsR_AC init failed\n"); + goto _err; + } + if (!(accVectorsRAC_P = _initAccVectors(cfg->threadNum,R->N))){ //TODO LESS || REUSE + ERRPRINT("accVectorsRAC_P init failed\n"); + goto _err; + } + + ulong c; + ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (R->M,R,cfg); + AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); + #pragma omp parallel for schedule(runtime) private(accRAC,accRACP,c) + for (ulong r=0; rM; r++){ //row-by-row formulation + //iterate over nz entry index c inside current row r + accRAC = accVectorsR_AC + omp_get_thread_num(); + accRACP = accVectorsRAC_P + omp_get_thread_num(); + //computing (tmp) R*AC r-th row + for (idx_t j=R->IRP[r]-OFF_F; jIRP[r+1]-OFF_F; j++) + CAT(scSparseRowMul_,OFF_F)(R->AS[j], AC, R->JA[j]-OFF_F, accRAC); + //forward the computed row + for (idx_t j=0; jnnzIdxMap.len; j++){ + c = accRAC->nnzIdx[j]; + CAT(scSparseRowMul_,OFF_F)(accRAC->v[c],P,c,accRACP); + } + //trasform accumulated dense vector to a CSR row TODO in UB buff + sparsifyUBNoPartsBounds(outAccumul,accRACP,outAccumul->accs+r,0); + _resetAccVect(accRAC); + _resetAccVect(accRACP); + } + ///merge sparse row computed before + if (mergeRows(outAccumul->accs,out)) goto _err; + #if OFF_F != 0 + C_FortranShiftIdxs(out); + #endif + AUDIT_INTERNAL_TIMES{ + End=omp_get_wtime(); + ElapsedInternal = End-Start; + } + DEBUG checkOverallocPercent(rowSizes,out); + goto _free; + + _err: + if(out) freeSpmat(out); + out = NULL; + _free: + if(rowSizes) free(rowSizes); + if(accVectorsR_AC) freeAccsDense(accVectorsR_AC,cfg->threadNum); + if(accVectorsRAC_P) freeAccsDense(accVectorsRAC_P,cfg->threadNum); + if(outAccumul) freeSpMMAcc(outAccumul); + + return out; +} diff --git a/base/serial/impl/sp3mm4amg/commons/ompGetICV.c b/base/serial/impl/sp3mm4amg/commons/ompGetICV.c new file mode 100644 index 00000000..1df55dde --- /dev/null +++ b/base/serial/impl/sp3mm4amg/commons/ompGetICV.c @@ -0,0 +1,86 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include + +char* SCHEDULES[]={"OMP_SCHED_STATIC","OMP_SCHED_DYNAMIC","OMP_SCHED_GUIDED","OMP_SCHED_AUTO"}; +void ompGetRuntimeSchedule(int* kind_chunk_monotonic){ + /* + * export OMP_SCHEDULE="[modifier:]kind[, chunk]" + * typedef enum omp_sched_t { + * //schedule kinds + * omp_sched_static = 0x1, + * omp_sched_dynamic = 0x2, + * omp_sched_guided = 0x3, + * omp_sched_auto = 0x4, + * //schedule modifier //TODO API>=5.0 + * omp_sched_monotonic = 0x80000000u //TODO OMP >=5 + * } omp_sched_t; + */ + omp_sched_t k,kind; int chunk_size,monotonic=0; + omp_get_schedule(&kind,&chunk_size); + k=kind; //[monotonic OFF] + #if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0 + monotonic = omp_sched_monotonic & kind; + if(monotonic) k = kind - omp_sched_monotonic; + #endif + printf("omp sched gather:\tkind:%s\tomp chunkSize:%d\tmonotonic:%s\tfairChunkFolding:%d\n", + SCHEDULES[k-1],chunk_size,monotonic?"Y":"N",FAIR_CHUNKS_FOLDING); + if (kind_chunk_monotonic){ + kind_chunk_monotonic[0] = k; + kind_chunk_monotonic[1] = chunk_size; + kind_chunk_monotonic[2] = monotonic; + } +} + +float ompVersionMacroMap(){ + switch ( _OPENMP ){ + case 200505: return 2.5; + case 200805: return 3.0; + case 201107: return 3.1; + case 201307: return 4.0; + case 201511: return 4.5; + case 201811: return 5.0; + case 202011: return 5.1; + } +} +//WRAPPER to print all ICV vars +#ifdef OMP_GET_ICV_MAIN +int main(){ +#else +void ompGetAllICV(){ +#endif + printf("export OMP_DISPLAY_ENV=VERBOSE for full ICV details\n"); + printf("omp MAX THREADS USABLE\t%d\n",omp_get_max_threads()); + ompGetRuntimeSchedule(NULL); + printf("omp API version:\t %1.1f\n",ompVersionMacroMap()); +} diff --git a/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/mpi.h.in b/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/mpi.h.in new file mode 100644 index 00000000..76154a5b --- /dev/null +++ b/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/mpi.h.in @@ -0,0 +1,3141 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2021 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2021 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved. + * Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2011-2013 INRIA. All rights reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2019 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2021-2022 Google, LLC. All rights reserved. + * Copyright (c) 2021-2022 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MPI_H +#define OMPI_MPI_H + +/* The comment below (and the ending partner) are for building fat + distributions on platforms that support it. Please do not remove */ + +/* @OMPI_BEGIN_CONFIGURE_SECTION@ */ + +#ifndef OMPI_CONFIG_H + +/* Only include these if OMPI_CONFIG_H isn't defined (meaning if + ompi_config.h hasn't already been included). Otherwise, we'll + duplicate all those symbols. OMPI coding standards say that + ompi_config.h must be included before all other files, so this + should be good enough */ + +/* The compiler id which OMPI was built with */ +#undef OPAL_BUILD_PLATFORM_COMPILER_FAMILYID + +/* The compiler version which OMPI was built with */ +#undef OPAL_BUILD_PLATFORM_COMPILER_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef OPAL_STDC_HEADERS + +/* Whether your compiler has __attribute__ deprecated or not */ +#undef OPAL_HAVE_ATTRIBUTE_DEPRECATED + +/* Whether your compiler has __attribute__ deprecated with the optional argument */ +#undef OPAL_HAVE_ATTRIBUTE_DEPRECATED_ARGUMENT + +/* Whether you compiler has __attribute__ error or not */ +#undef OPAL_HAVE_ATTRIBUTE_ERROR + +/* Define to 1 if you have the header file. */ +#undef OPAL_HAVE_SYS_TIME_H + +/* Define to 1 if you have the header file. */ +#undef OPAL_HAVE_SYS_SYNCH_H + +/* Define to 1 if the system has the type `long long'. */ +#undef OPAL_HAVE_LONG_LONG + +/* The size of a `bool', as computed by sizeof. */ +#undef OPAL_SIZEOF_BOOL + +/* The size of a `int', as computed by sizeof. */ +#undef OPAL_SIZEOF_INT + +/* The size of a `void*', as computed by sizeof. */ +#undef OPAL_SIZEOF_VOID_P + +/* Maximum length of datarep string (default is 128) */ +#undef OPAL_MAX_DATAREP_STRING + +/* Maximum length of error strings (default is 256) */ +#undef OPAL_MAX_ERROR_STRING + +/* Maximum length of info keys (default is 36) */ +#undef OPAL_MAX_INFO_KEY + +/* Maximum length of info vals (default is 256) */ +#undef OPAL_MAX_INFO_VAL + +/* Maximum length of object names (default is 64) */ +#undef OPAL_MAX_OBJECT_NAME + +/* Maximum length of port names (default is 1024) */ +#undef OPAL_MAX_PORT_NAME + +/* Maximum length of processor names (default is 256) */ +#undef OPAL_MAX_PROCESSOR_NAME + +/* Maximum length of processor names (default is 1024) */ +#undef OPAL_MAX_PSET_NAME_LEN + +/* Maximum length of from group tag (default is 256) */ +#undef OPAL_MAX_STRINGTAG_LEN + +/* The number or Fortran INTEGER in MPI Status */ +#undef OMPI_FORTRAN_STATUS_SIZE + +/* Whether we have FORTRAN LOGICAL*1 or not */ +#undef OMPI_HAVE_FORTRAN_LOGICAL1 + +/* Whether we have FORTRAN LOGICAL*2 or not */ +#undef OMPI_HAVE_FORTRAN_LOGICAL2 + +/* Whether we have FORTRAN LOGICAL*4 or not */ +#undef OMPI_HAVE_FORTRAN_LOGICAL4 + +/* Whether we have FORTRAN LOGICAL*8 or not */ +#undef OMPI_HAVE_FORTRAN_LOGICAL8 + +/* Whether we have FORTRAN INTEGER*1 or not */ +#undef OMPI_HAVE_FORTRAN_INTEGER1 + +/* Whether we have FORTRAN INTEGER*16 or not */ +#undef OMPI_HAVE_FORTRAN_INTEGER16 + +/* Whether we have FORTRAN INTEGER*2 or not */ +#undef OMPI_HAVE_FORTRAN_INTEGER2 + +/* Whether we have FORTRAN INTEGER*4 or not */ +#undef OMPI_HAVE_FORTRAN_INTEGER4 + +/* Whether we have FORTRAN INTEGER*8 or not */ +#undef OMPI_HAVE_FORTRAN_INTEGER8 + +/* Whether we have FORTRAN REAL*16 or not */ +#undef OMPI_HAVE_FORTRAN_REAL16 + +/* Whether we have FORTRAN REAL*2 or not */ +#undef OMPI_HAVE_FORTRAN_REAL2 + +/* Whether we have FORTRAN REAL*4 or not */ +#undef OMPI_HAVE_FORTRAN_REAL4 + +/* Whether we have FORTRAN REAL*8 or not */ +#undef OMPI_HAVE_FORTRAN_REAL8 + +/* Whether in include MPI-1 compatibility */ +#undef OMPI_ENABLE_MPI1_COMPAT + +/* Whether we have float _Complex or not */ +#undef HAVE_FLOAT__COMPLEX + +/* Whether we have double _Complex or not */ +#undef HAVE_DOUBLE__COMPLEX + +/* Whether we have long double _Complex or not */ +#undef HAVE_LONG_DOUBLE__COMPLEX + +/* Type of MPI_Aint */ +#undef OMPI_MPI_AINT_TYPE + +/* Type of MPI_Offset */ +#undef OMPI_MPI_OFFSET_TYPE + +/* MPI datatype corresponding to MPI_Offset */ +#undef OMPI_OFFSET_DATATYPE + +/* Size of the MPI_Offset corresponding type */ +#undef OMPI_MPI_OFFSET_SIZE + +/* Type of MPI_Count */ +#undef OMPI_MPI_COUNT_TYPE + +/* type to use for ptrdiff_t, if it does not exist, set to ptrdiff_t if it does exist */ +#undef ptrdiff_t + +/* Whether OMPI was built with parameter checking or not */ +#undef OMPI_PARAM_CHECK + +/* Enable warnings in wrong usage (e.g. deprecated) in user-level code */ +#ifndef OMPI_WANT_MPI_INTERFACE_WARNING +#undef OMPI_WANT_MPI_INTERFACE_WARNING +#endif + +/* Major, minor, and release version of Open MPI */ +#undef OMPI_MAJOR_VERSION +#undef OMPI_MINOR_VERSION +#undef OMPI_RELEASE_VERSION + +/* A type that allows us to have sentinel type values that are still + valid */ +#undef ompi_fortran_bogus_type_t + +/* C type corresponding to FORTRAN INTEGER */ +#undef ompi_fortran_integer_t + +/* Whether C compiler supports -fvisibility */ +#undef OPAL_C_HAVE_VISIBILITY + +#ifndef OMPI_DECLSPEC +# if defined(WIN32) || defined(_WIN32) +# if defined(OMPI_IMPORTS) +# define OMPI_DECLSPEC __declspec(dllimport) +# else +# define OMPI_DECLSPEC +# endif /* defined(OMPI_IMPORTS) */ +# else +# if OPAL_C_HAVE_VISIBILITY == 1 +# define OMPI_DECLSPEC __attribute__((visibility("default"))) +# else +# define OMPI_DECLSPEC +# endif +# endif +#endif + +#ifndef MPI_Fint +/* MPI_Fint is the same as ompi_fortran_INTEGER_t */ +#define MPI_Fint ompi_fortran_integer_t +#endif + +#endif /* #ifndef OMPI_CONFIG_H */ + +/* @OMPI_END_CONFIGURE_SECTION@ */ + +/* include for ptrdiff_t */ +#ifdef OPAL_STDC_HEADERS +#include +#endif + +#ifndef OMPI_BUILDING +#define OMPI_BUILDING 0 +#endif + + +/* + * Just in case you need it. :-) + */ +#define OPEN_MPI 1 + +/* + * MPI version + */ +#define MPI_VERSION 3 +#define MPI_SUBVERSION 1 + + +/* + * Do we want MPI interface deprecated function warnings? This is + * only relevant if we're not building Open MPI (i.e., we're compiling an + * MPI application). + */ +#if !OMPI_BUILDING + /* + * Figure out which compiler is being invoked (in order to compare if + * it was different than what OMPI was built with). Do some preprocessor + * hacks to eliminate warnings in the portable_platform.h file. + */ +# ifndef SIZEOF_VOID_P +# define CLEANUP_SIZEOF_VOID_P 1 +# define SIZEOF_VOID_P OPAL_SIZEOF_VOID_P +# else +# define CLEANUP_SIZEOF_VOID_P 0 +# endif + +# ifndef _PORTABLE_PLATFORM_H +# define _PORTABLE_PLATFORM_H 0 +# endif +# ifndef PLATFORM_HEADER_VERSION +# define PLATFORM_HEADER_VERSION 0 +# endif + +# include "mpi_portable_platform.h" + +#if CLEANUP_SIZEOF_VOID_P +#undef SIZEOF_VOID_P +#endif + + /* + * If we're currently using the same compiler that was used to + * build Open MPI, enable compile-time warning of user-level code + * (e.g. usage of deprecated functions). + */ +# if (OPAL_BUILD_PLATFORM_COMPILER_FAMILYID == PLATFORM_COMPILER_FAMILYID) && \ + (OPAL_BUILD_PLATFORM_COMPILER_VERSION == PLATFORM_COMPILER_VERSION) + +# if OMPI_WANT_MPI_INTERFACE_WARNING +# if OPAL_HAVE_ATTRIBUTE_DEPRECATED +# if OPAL_HAVE_ATTRIBUTE_DEPRECATED_ARGUMENT +# define __mpi_interface_deprecated__(msg) __attribute__((__deprecated__(msg))) +# else +# define __mpi_interface_deprecated__(msg) __attribute__((__deprecated__)) +# endif +# endif +# endif + + /* For MPI removed APIs, there is no generally portable way to cause + * the C compiler to error with a nice message, on the _usage_ of + * one of these symbols. We've gone with tiered approach: + * + * If the user configured with --enable-mpi1-compatibility, + * just emit a compiletime warning (via the deprecation function + * attribute) that they're using an MPI1 removed function. + * + * Otherwise, we'd like to issue a fatal error directing the + * user that they've used an MPI1 removed function. If the + * user's compiler supports C11 _Static_assert() or + * C++11 static_assert(), we #define the MPI routines to + * instead be a call to an assert with an appropriate message + * suggesting the new MPI3 equivalent. + * + * Otherwise, if the user's compiler supports the error function + * attribute, define the MPI routines with that error attribute. + * This is supported by most modern GNU compilers. + * + * Finally if the compiler doesn't support any of those, just + * Don't declare those MPI routines at all in mpi.h + * + * Don't do MACRO magic for building Profiling library as it + * interferes with the above. + */ +# if defined(OMPI_OMIT_MPI1_COMPAT_DECLS) + /* The user set OMPI_OMIT_MPI1_COMPAT_DECLS, do what he commands */ +# elif OMPI_ENABLE_MPI1_COMPAT +# define OMPI_OMIT_MPI1_COMPAT_DECLS 0 +# define OMPI_REMOVED_USE_STATIC_ASSERT 0 +# define __mpi_interface_removed__(func, newfunc) __mpi_interface_deprecated__(#func " was removed in MPI-3.0. Use " #newfunc " instead. continuing...") +# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + /* This is the C11 (or later) case, which uses + _Static_assert() */ +# define OMPI_OMIT_MPI1_COMPAT_DECLS 1 +# define OMPI_REMOVED_USE_STATIC_ASSERT 1 +/* This macro definition may show up in compiler output. So we both + * outdent it back to column 0 and give it a user-friendly name to + * help users grok what we are trying to tell them here. + */ +#define THIS_SYMBOL_WAS_REMOVED_IN_MPI30(symbol, new_symbol) 0; _Static_assert(0, #symbol " was removed in MPI-3.0. Use " #new_symbol " instead.") +#define THIS_FUNCTION_WAS_REMOVED_IN_MPI30(func, newfunc) _Static_assert(0, #func " was removed in MPI-3.0. Use " #newfunc " instead.") +# elif defined(__cplusplus) && (__cplusplus >= 201103L) + /* This is the C++11 (or later) case, which uses + static_assert() */ +# define OMPI_OMIT_MPI1_COMPAT_DECLS 1 +# define OMPI_REMOVED_USE_STATIC_ASSERT 1 +/* This macro definition may show up in compiler output. So we both + * outdent it back to column 0 and give it a user-friendly name to + * help users grok what we are trying to tell them here. + */ +#define THIS_SYMBOL_WAS_REMOVED_IN_MPI30(symbol, new_symbol) 0; static_assert(0, #symbol " was removed in MPI-3.0. Use " #new_symbol " instead.") +#define THIS_FUNCTION_WAS_REMOVED_IN_MPI30(func, newfunc) static_assert(0, #func " was removed in MPI-3.0. Use " #newfunc " instead.") +# elif OPAL_HAVE_ATTRIBUTE_ERROR +# define OMPI_OMIT_MPI1_COMPAT_DECLS 0 +# define OMPI_REMOVED_USE_STATIC_ASSERT 0 +# define __mpi_interface_removed__(func, newfunc) __attribute__((__error__(#func " was removed in MPI-3.0. Use " #newfunc " instead."))) +# else +# define OMPI_OMIT_MPI1_COMPAT_DECLS 1 +# define OMPI_REMOVED_USE_STATIC_ASSERT 0 +# endif +# endif +#endif + +/* + * If we didn't define __mpi_interface_deprecated__ above, then we + * don't want it, so define it to empty (can't use #undef in the logic + * above because autoconf will comment it out). + */ +#if !defined(__mpi_interface_deprecated__) +# define __mpi_interface_deprecated__(msg) +#endif + +#if !defined(__mpi_interface_removed__) +# define __mpi_interface_removed__(A,B) +#endif + +#if !defined(THIS_SYMBOL_WAS_REMOVED_IN_MPI30) +# define THIS_SYMBOL_WAS_REMOVED_IN_MPI30(symbol, newsymbol) +#endif +#if !defined(THIS_FUNCTION_WAS_REMOVED_IN_MPI30) +# define THIS_FUNCTION_WAS_REMOVED_IN_MPI30(func, newfunc) +#endif + +#if !defined(OMPI_REMOVED_USE_STATIC_ASSERT) +# define OMPI_REMOVED_USE_STATIC_ASSERT 0 +#endif + +#if !defined(OMPI_OMIT_MPI1_COMPAT_DECLS) +# define OMPI_OMIT_MPI1_COMPAT_DECLS (!(OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING)) +#endif + +/* + * To accommodate programs written for MPI implementations that use a + * straight ROMIO import + */ +#if !OMPI_BUILDING +#define MPIO_Request MPI_Request +#define MPIO_Test MPI_Test +#define MPIO_Wait MPI_Wait +#endif + +/* + * When initializing global pointers to Open MPI internally-defined + * structs, some compilers warn about type-punning to incomplete + * types. Therefore, when full struct definitions are unavailable + * (when not building Open MPI), cast to an opaque (void *) pointer to + * disable any strict-aliasing optimizations. Don't cast to (void *) + * when building Open MPI so that we actually get the benefit of type + * checking (because we *do* have the full type definitions available + * when building OMPI). + */ +#if !OMPI_BUILDING +#if defined(c_plusplus) || defined(__cplusplus) +#define OMPI_PREDEFINED_GLOBAL(type, global) (static_cast (static_cast (&(global)))) +#else +#define OMPI_PREDEFINED_GLOBAL(type, global) ((type) ((void *) &(global))) +#endif +#else +#define OMPI_PREDEFINED_GLOBAL(type, global) ((type) &(global)) +#endif + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Typedefs + */ + +typedef OMPI_MPI_AINT_TYPE MPI_Aint; +typedef OMPI_MPI_OFFSET_TYPE MPI_Offset; +typedef OMPI_MPI_COUNT_TYPE MPI_Count; +typedef struct ompi_communicator_t *MPI_Comm; +typedef struct ompi_datatype_t *MPI_Datatype; +typedef struct ompi_errhandler_t *MPI_Errhandler; +typedef struct ompi_file_t *MPI_File; +typedef struct ompi_group_t *MPI_Group; +typedef struct ompi_info_t *MPI_Info; +typedef struct ompi_op_t *MPI_Op; +typedef struct ompi_request_t *MPI_Request; +typedef struct ompi_message_t *MPI_Message; +typedef struct ompi_status_public_t MPI_Status; +typedef struct ompi_f08_status_public_t MPI_F08_status; +typedef struct ompi_win_t *MPI_Win; +typedef struct mca_base_var_enum_t *MPI_T_enum; +typedef struct ompi_mpit_cvar_handle_t *MPI_T_cvar_handle; +typedef struct mca_base_pvar_handle_t *MPI_T_pvar_handle; +typedef struct mca_base_pvar_session_t *MPI_T_pvar_session; +typedef struct ompi_instance_t *MPI_Session; + +/* + * MPI_Status + */ +struct ompi_status_public_t { + /* These fields are publicly defined in the MPI specification. + User applications may freely read from these fields. */ + int MPI_SOURCE; + int MPI_TAG; + int MPI_ERROR; + /* The following two fields are internal to the Open MPI + implementation and should not be accessed by MPI applications. + They are subject to change at any time. These are not the + droids you're looking for. */ + int _cancelled; + size_t _ucount; +}; +typedef struct ompi_status_public_t ompi_status_public_t; + +/* + * MPI_F08_status + */ +struct ompi_f08_status_public_t { + /* These fields are publicly defined in the MPI specification. + User applications may freely read from these fields. */ + MPI_Fint MPI_SOURCE; + MPI_Fint MPI_TAG; + MPI_Fint MPI_ERROR; + MPI_Fint internal[OMPI_FORTRAN_STATUS_SIZE - 3]; +}; +typedef struct ompi_f08_status_public_t ompi_f08_status_public_t; + +/* + * User typedefs + */ +typedef int (MPI_Datarep_extent_function)(MPI_Datatype, MPI_Aint *, void *); +typedef int (MPI_Datarep_conversion_function)(void *, MPI_Datatype, + int, void *, MPI_Offset, void *); +typedef void (MPI_Comm_errhandler_function)(MPI_Comm *, int *, ...); +typedef void (MPI_Session_errhandler_function) (MPI_Session *, int *, ...); + + /* This is a little hackish, but errhandler.h needs space for a + MPI_File_errhandler_function. While it could just be removed, this + allows us to maintain a stable ABI within OMPI, at least for + apps that don't use MPI I/O. */ +typedef void (ompi_file_errhandler_function)(MPI_File *, int *, ...); +typedef void (MPI_Win_errhandler_function)(MPI_Win *, int *, ...); +typedef void (MPI_User_function)(void *, void *, int *, MPI_Datatype *); +typedef int (MPI_Comm_copy_attr_function)(MPI_Comm, int, void *, + void *, void *, int *); +typedef int (MPI_Comm_delete_attr_function)(MPI_Comm, int, void *, void *); +typedef int (MPI_Type_copy_attr_function)(MPI_Datatype, int, void *, + void *, void *, int *); +typedef int (MPI_Type_delete_attr_function)(MPI_Datatype, int, + void *, void *); +typedef int (MPI_Win_copy_attr_function)(MPI_Win, int, void *, + void *, void *, int *); +typedef int (MPI_Win_delete_attr_function)(MPI_Win, int, void *, void *); +typedef int (MPI_Session_delete_attr_function)(MPI_Session, int, void *, void *); +typedef int (MPI_Grequest_query_function)(void *, MPI_Status *); +typedef int (MPI_Grequest_free_function)(void *); +typedef int (MPI_Grequest_cancel_function)(void *, int); + +/* + * Deprecated typedefs. Usage is discouraged, as these may be deleted + * in future versions of the MPI Standard. + */ +typedef MPI_Comm_errhandler_function MPI_Comm_errhandler_fn + __mpi_interface_deprecated__("MPI_Comm_errhandler_fn was deprecated in MPI-2.2; use MPI_Comm_errhandler_function instead"); +typedef ompi_file_errhandler_function MPI_File_errhandler_fn + __mpi_interface_deprecated__("MPI_File_errhandler_fn was deprecated in MPI-2.2; use MPI_File_errhandler_function instead"); +typedef ompi_file_errhandler_function MPI_File_errhandler_function; +typedef MPI_Win_errhandler_function MPI_Win_errhandler_fn + __mpi_interface_deprecated__("MPI_Win_errhandler_fn was deprecated in MPI-2.2; use MPI_Win_errhandler_function instead"); + + +/* + * Miscellaneous constants + */ +#define MPI_ANY_SOURCE -1 /* match any source rank */ +#define MPI_PROC_NULL -2 /* rank of null process */ +#define MPI_ROOT -4 /* special value for intercomms */ +#define MPI_ANY_TAG -1 /* match any message tag */ +#define MPI_MAX_PROCESSOR_NAME OPAL_MAX_PROCESSOR_NAME /* max proc. name length */ +#define MPI_MAX_ERROR_STRING OPAL_MAX_ERROR_STRING /* max error message length */ +#define MPI_MAX_OBJECT_NAME OPAL_MAX_OBJECT_NAME /* max object name length */ +#define MPI_MAX_LIBRARY_VERSION_STRING 256 /* max length of library version string */ +#define MPI_UNDEFINED -32766 /* undefined stuff */ +#define MPI_DIST_GRAPH 3 /* dist graph topology */ +#define MPI_CART 1 /* cartesian topology */ +#define MPI_GRAPH 2 /* graph topology */ +#define MPI_KEYVAL_INVALID -1 /* invalid key value */ + +/* + * More constants + */ +#define MPI_UNWEIGHTED ((int *) 2) /* unweighted graph */ +#define MPI_WEIGHTS_EMPTY ((int *) 3) /* empty weights */ +#define MPI_BOTTOM ((void *) 0) /* base reference address */ +#define MPI_IN_PLACE ((void *) 1) /* in place buffer */ +#define MPI_BSEND_OVERHEAD 128 /* size of bsend header + ptr */ +#define MPI_MAX_INFO_KEY OPAL_MAX_INFO_KEY /* max info key length */ +#define MPI_MAX_INFO_VAL OPAL_MAX_INFO_VAL /* max info value length */ +#define MPI_ARGV_NULL ((char **) 0) /* NULL argument vector */ +#define MPI_ARGVS_NULL ((char ***) 0) /* NULL argument vectors */ +#define MPI_ERRCODES_IGNORE ((int *) 0) /* don't return error codes */ +#define MPI_MAX_PORT_NAME OPAL_MAX_PORT_NAME /* max port name length */ +#define MPI_ORDER_C 0 /* C row major order */ +#define MPI_ORDER_FORTRAN 1 /* Fortran column major order */ +#define MPI_DISTRIBUTE_BLOCK 0 /* block distribution */ +#define MPI_DISTRIBUTE_CYCLIC 1 /* cyclic distribution */ +#define MPI_DISTRIBUTE_NONE 2 /* not distributed */ +#define MPI_DISTRIBUTE_DFLT_DARG (-1) /* default distribution arg */ +#define MPI_MAX_PSET_NAME_LEN OPAL_MAX_PSET_NAME_LEN /* max pset name len */ +#define MPI_MAX_STRINGTAG_LEN OPAL_MAX_STRINGTAG_LEN /* max length of string arg to comm from group funcs*/ + +/* + * Since these values are arbitrary to Open MPI, we might as well make + * them the same as ROMIO for ease of mapping. These values taken + * from ROMIO's mpio.h file. + */ +#define MPI_MODE_CREATE 1 /* ADIO_CREATE */ +#define MPI_MODE_RDONLY 2 /* ADIO_RDONLY */ +#define MPI_MODE_WRONLY 4 /* ADIO_WRONLY */ +#define MPI_MODE_RDWR 8 /* ADIO_RDWR */ +#define MPI_MODE_DELETE_ON_CLOSE 16 /* ADIO_DELETE_ON_CLOSE */ +#define MPI_MODE_UNIQUE_OPEN 32 /* ADIO_UNIQUE_OPEN */ +#define MPI_MODE_EXCL 64 /* ADIO_EXCL */ +#define MPI_MODE_APPEND 128 /* ADIO_APPEND */ +#define MPI_MODE_SEQUENTIAL 256 /* ADIO_SEQUENTIAL */ + +#define MPI_DISPLACEMENT_CURRENT -54278278 + +#define MPI_SEEK_SET 600 +#define MPI_SEEK_CUR 602 +#define MPI_SEEK_END 604 + +/* Max data representation length */ +#define MPI_MAX_DATAREP_STRING OPAL_MAX_DATAREP_STRING + +/* + * MPI-2 One-Sided Communications asserts + */ +#define MPI_MODE_NOCHECK 1 +#define MPI_MODE_NOPRECEDE 2 +#define MPI_MODE_NOPUT 4 +#define MPI_MODE_NOSTORE 8 +#define MPI_MODE_NOSUCCEED 16 + +#define MPI_LOCK_EXCLUSIVE 1 +#define MPI_LOCK_SHARED 2 + +#define MPI_WIN_FLAVOR_CREATE 1 +#define MPI_WIN_FLAVOR_ALLOCATE 2 +#define MPI_WIN_FLAVOR_DYNAMIC 3 +#define MPI_WIN_FLAVOR_SHARED 4 + +#define MPI_WIN_UNIFIED 0 +#define MPI_WIN_SEPARATE 1 + +/* + * Predefined attribute keyvals + * + * DO NOT CHANGE THE ORDER WITHOUT ALSO CHANGING THE ORDER IN + * src/attribute/attribute_predefined.c and mpif.h.in. + */ +enum { + /* MPI-1 */ + MPI_TAG_UB, + MPI_HOST, + MPI_IO, + MPI_WTIME_IS_GLOBAL, + + /* MPI-2 */ + MPI_APPNUM, + MPI_LASTUSEDCODE, + MPI_UNIVERSE_SIZE, + MPI_WIN_BASE, + MPI_WIN_SIZE, + MPI_WIN_DISP_UNIT, + MPI_WIN_CREATE_FLAVOR, + MPI_WIN_MODEL, + + /* MPI-4 */ + MPI_FT, /* used by OPAL_ENABLE_FT_MPI */ + MPI_ATTR_PREDEFINED_KEY_MAX, +}; + +/* + * Error classes and codes + * Do not change the values of these without also modifying mpif.h.in. + */ +#define MPI_SUCCESS 0 +#define MPI_ERR_BUFFER 1 +#define MPI_ERR_COUNT 2 +#define MPI_ERR_TYPE 3 +#define MPI_ERR_TAG 4 +#define MPI_ERR_COMM 5 +#define MPI_ERR_RANK 6 +#define MPI_ERR_REQUEST 7 +#define MPI_ERR_ROOT 8 +#define MPI_ERR_GROUP 9 +#define MPI_ERR_OP 10 +#define MPI_ERR_TOPOLOGY 11 +#define MPI_ERR_DIMS 12 +#define MPI_ERR_ARG 13 +#define MPI_ERR_UNKNOWN 14 +#define MPI_ERR_TRUNCATE 15 +#define MPI_ERR_OTHER 16 +#define MPI_ERR_INTERN 17 +#define MPI_ERR_IN_STATUS 18 +#define MPI_ERR_PENDING 19 +#define MPI_ERR_ACCESS 20 +#define MPI_ERR_AMODE 21 +#define MPI_ERR_ASSERT 22 +#define MPI_ERR_BAD_FILE 23 +#define MPI_ERR_BASE 24 +#define MPI_ERR_CONVERSION 25 +#define MPI_ERR_DISP 26 +#define MPI_ERR_DUP_DATAREP 27 +#define MPI_ERR_FILE_EXISTS 28 +#define MPI_ERR_FILE_IN_USE 29 +#define MPI_ERR_FILE 30 +#define MPI_ERR_INFO_KEY 31 +#define MPI_ERR_INFO_NOKEY 32 +#define MPI_ERR_INFO_VALUE 33 +#define MPI_ERR_INFO 34 +#define MPI_ERR_IO 35 +#define MPI_ERR_KEYVAL 36 +#define MPI_ERR_LOCKTYPE 37 +#define MPI_ERR_NAME 38 +#define MPI_ERR_NO_MEM 39 +#define MPI_ERR_NOT_SAME 40 +#define MPI_ERR_NO_SPACE 41 +#define MPI_ERR_NO_SUCH_FILE 42 +#define MPI_ERR_PORT 43 +#define MPI_ERR_QUOTA 44 +#define MPI_ERR_READ_ONLY 45 +#define MPI_ERR_RMA_CONFLICT 46 +#define MPI_ERR_RMA_SYNC 47 +#define MPI_ERR_SERVICE 48 +#define MPI_ERR_SIZE 49 +#define MPI_ERR_SPAWN 50 +#define MPI_ERR_UNSUPPORTED_DATAREP 51 +#define MPI_ERR_UNSUPPORTED_OPERATION 52 +#define MPI_ERR_WIN 53 + +#define MPI_T_ERR_MEMORY 54 +#define MPI_T_ERR_NOT_INITIALIZED 55 +#define MPI_T_ERR_CANNOT_INIT 56 +#define MPI_T_ERR_INVALID_INDEX 57 +#define MPI_T_ERR_INVALID_ITEM 58 +#define MPI_T_ERR_INVALID_HANDLE 59 +#define MPI_T_ERR_OUT_OF_HANDLES 60 +#define MPI_T_ERR_OUT_OF_SESSIONS 61 +#define MPI_T_ERR_INVALID_SESSION 62 +#define MPI_T_ERR_CVAR_SET_NOT_NOW 63 +#define MPI_T_ERR_CVAR_SET_NEVER 64 +#define MPI_T_ERR_PVAR_NO_STARTSTOP 65 +#define MPI_T_ERR_PVAR_NO_WRITE 66 +#define MPI_T_ERR_PVAR_NO_ATOMIC 67 +#define MPI_ERR_RMA_RANGE 68 +#define MPI_ERR_RMA_ATTACH 69 +#define MPI_ERR_RMA_FLAVOR 70 +#define MPI_ERR_RMA_SHARED 71 +#define MPI_T_ERR_INVALID 72 +#define MPI_T_ERR_INVALID_NAME 73 +#define MPI_ERR_PROC_ABORTED 74 + +/* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */ +#define MPI_ERR_PROC_FAILED 75 +#define MPI_ERR_PROC_FAILED_PENDING 76 +#define MPI_ERR_REVOKED 77 + +/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined + MPI_ERR_ code. Set the last code to allow some room for adding + error codes without breaking ABI. */ +#define MPI_ERR_LASTCODE 92 + +/* + * Comparison results. Don't change the order of these, the group + * comparison functions rely on it. + * Do not change the order of these without also modifying mpif.h.in. + */ +enum { + MPI_IDENT, + MPI_CONGRUENT, + MPI_SIMILAR, + MPI_UNEQUAL +}; + +/* + * MPI_Init_thread constants + * Do not change the order of these without also modifying mpif.h.in. + */ +enum { + MPI_THREAD_SINGLE, + MPI_THREAD_FUNNELED, + MPI_THREAD_SERIALIZED, + MPI_THREAD_MULTIPLE +}; + +/* + * Datatype combiners. + * Do not change the order of these without also modifying mpif.h.in. + * (see also mpif-common.h.fin). + */ +enum { + MPI_COMBINER_NAMED, + MPI_COMBINER_DUP, + MPI_COMBINER_CONTIGUOUS, + MPI_COMBINER_VECTOR, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_HVECTOR_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_HVECTOR, + MPI_COMBINER_INDEXED, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_HINDEXED_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_HINDEXED, + MPI_COMBINER_INDEXED_BLOCK, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_STRUCT_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_STRUCT, + MPI_COMBINER_SUBARRAY, + MPI_COMBINER_DARRAY, + MPI_COMBINER_F90_REAL, + MPI_COMBINER_F90_COMPLEX, + MPI_COMBINER_F90_INTEGER, + MPI_COMBINER_RESIZED, + MPI_COMBINER_HINDEXED_BLOCK +}; + +#if (OMPI_OMIT_MPI1_COMPAT_DECLS) +/* If not building or configured --enable-mpi1-compatibility, then + * we don't want these datatypes, instead we define MPI_COMBINER_*_INTEGER + * to our Static Assert message if the compiler supports + * that staticly assert with a nice message. + */ +# if (OMPI_REMOVED_USE_STATIC_ASSERT) +# define MPI_COMBINER_HVECTOR_INTEGER THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_COMBINER_HVECTOR_INTEGER, MPI_COMBINER_HVECTOR); +# define MPI_COMBINER_HINDEXED_INTEGER THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_COMBINER_HINDEXED_INTEGER, MPI_COMBINER_HINDEXED); +# define MPI_COMBINER_STRUCT_INTEGER THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_COMBINER_STRUCT_INTEGER, MPI_COMBINER_STRUCT); +# endif /* OMPI_REMOVED_USE_STATIC_ASSERT */ +#endif /* Removed datatypes */ + +/* + * Communicator split type constants. + * Do not change the order of these without also modifying mpif.h.in + * (see also mpif-common.h.fin). + */ +enum { + MPI_COMM_TYPE_SHARED, + OMPI_COMM_TYPE_HWTHREAD, + OMPI_COMM_TYPE_CORE, + OMPI_COMM_TYPE_L1CACHE, + OMPI_COMM_TYPE_L2CACHE, + OMPI_COMM_TYPE_L3CACHE, + OMPI_COMM_TYPE_SOCKET, + OMPI_COMM_TYPE_NUMA, + OMPI_COMM_TYPE_BOARD, + OMPI_COMM_TYPE_HOST, + OMPI_COMM_TYPE_CU, + OMPI_COMM_TYPE_CLUSTER +}; +#define OMPI_COMM_TYPE_NODE MPI_COMM_TYPE_SHARED + +/* + * MPIT Verbosity Levels + */ +enum { + MPI_T_VERBOSITY_USER_BASIC, + MPI_T_VERBOSITY_USER_DETAIL, + MPI_T_VERBOSITY_USER_ALL, + MPI_T_VERBOSITY_TUNER_BASIC, + MPI_T_VERBOSITY_TUNER_DETAIL, + MPI_T_VERBOSITY_TUNER_ALL, + MPI_T_VERBOSITY_MPIDEV_BASIC, + MPI_T_VERBOSITY_MPIDEV_DETAIL, + MPI_T_VERBOSITY_MPIDEV_ALL +}; + +/* + * MPIT Scopes + */ +enum { + MPI_T_SCOPE_CONSTANT, + MPI_T_SCOPE_READONLY, + MPI_T_SCOPE_LOCAL, + MPI_T_SCOPE_GROUP, + MPI_T_SCOPE_GROUP_EQ, + MPI_T_SCOPE_ALL, + MPI_T_SCOPE_ALL_EQ +}; + +/* + * MPIT Object Binding + */ +enum { + MPI_T_BIND_NO_OBJECT, + MPI_T_BIND_MPI_COMM, + MPI_T_BIND_MPI_DATATYPE, + MPI_T_BIND_MPI_ERRHANDLER, + MPI_T_BIND_MPI_FILE, + MPI_T_BIND_MPI_GROUP, + MPI_T_BIND_MPI_OP, + MPI_T_BIND_MPI_REQUEST, + MPI_T_BIND_MPI_WIN, + MPI_T_BIND_MPI_MESSAGE, + MPI_T_BIND_MPI_INFO +}; + +/* + * MPIT pvar classes + */ +enum { + MPI_T_PVAR_CLASS_STATE, + MPI_T_PVAR_CLASS_LEVEL, + MPI_T_PVAR_CLASS_SIZE, + MPI_T_PVAR_CLASS_PERCENTAGE, + MPI_T_PVAR_CLASS_HIGHWATERMARK, + MPI_T_PVAR_CLASS_LOWWATERMARK, + MPI_T_PVAR_CLASS_COUNTER, + MPI_T_PVAR_CLASS_AGGREGATE, + MPI_T_PVAR_CLASS_TIMER, + MPI_T_PVAR_CLASS_GENERIC +}; + +/* + * NULL handles + */ +#define MPI_SESSION_NULL OMPI_PREDEFINED_GLOBAL(MPI_Session, ompi_mpi_instance_null) +#define MPI_GROUP_NULL OMPI_PREDEFINED_GLOBAL(MPI_Group, ompi_mpi_group_null) +#define MPI_COMM_NULL OMPI_PREDEFINED_GLOBAL(MPI_Comm, ompi_mpi_comm_null) +#define MPI_REQUEST_NULL OMPI_PREDEFINED_GLOBAL(MPI_Request, ompi_request_null) +#define MPI_MESSAGE_NULL OMPI_PREDEFINED_GLOBAL(MPI_Message, ompi_message_null) +#define MPI_OP_NULL OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_null) +#define MPI_ERRHANDLER_NULL OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errhandler_null) +#define MPI_INFO_NULL OMPI_PREDEFINED_GLOBAL(MPI_Info, ompi_mpi_info_null) +#define MPI_WIN_NULL OMPI_PREDEFINED_GLOBAL(MPI_Win, ompi_mpi_win_null) +#define MPI_FILE_NULL OMPI_PREDEFINED_GLOBAL(MPI_File, ompi_mpi_file_null) +#define MPI_T_ENUM_NULL ((MPI_T_enum) NULL) + +/* + * MPI_INFO_ENV handle + */ +#define MPI_INFO_ENV OMPI_PREDEFINED_GLOBAL(MPI_Info, ompi_mpi_info_env) + +#if defined(c_plusplus) || defined(__cplusplus) +#define MPI_STATUS_IGNORE (static_cast (0)) +#define MPI_STATUSES_IGNORE (static_cast (0)) +#else +#define MPI_STATUS_IGNORE ((MPI_Status *) 0) +#define MPI_STATUSES_IGNORE ((MPI_Status *) 0) +#endif + +/* + * Special MPI_T handles + */ +#define MPI_T_PVAR_ALL_HANDLES ((MPI_T_pvar_handle) -1) +#define MPI_T_PVAR_HANDLE_NULL ((MPI_T_pvar_handle) 0) +#define MPI_T_PVAR_SESSION_NULL ((MPI_T_pvar_session) 0) +#define MPI_T_CVAR_HANDLE_NULL ((MPI_T_cvar_handle) 0) + +/* MPI-2 specifies that the name "MPI_TYPE_NULL_DELETE_FN" (and all + related friends) must be accessible in C, C++, and Fortran. This is + unworkable if the back-end Fortran compiler uses all caps for its + linker symbol convention -- it results in two functions with + different signatures that have the same name (i.e., both C and + Fortran use the symbol MPI_TYPE_NULL_DELETE_FN). So we have to + #define the C names to be something else, so that they names are + *accessed* through MPI_TYPE_NULL_DELETE_FN, but their actual symbol + name is different. + + However, this file is included when the fortran wrapper functions + are compiled in Open MPI, so we do *not* want these #defines in + this case (i.e., we need the Fortran wrapper function to be + compiled as MPI_TYPE_NULL_DELETE_FN). So add some #if kinds of + protection for this case. */ + +#if !defined(OMPI_COMPILING_FORTRAN_WRAPPERS) + +#define MPI_TYPE_NULL_DELETE_FN OMPI_C_MPI_TYPE_NULL_DELETE_FN +#define MPI_TYPE_NULL_COPY_FN OMPI_C_MPI_TYPE_NULL_COPY_FN +#define MPI_TYPE_DUP_FN OMPI_C_MPI_TYPE_DUP_FN + +#define MPI_COMM_NULL_DELETE_FN OMPI_C_MPI_COMM_NULL_DELETE_FN +#define MPI_COMM_NULL_COPY_FN OMPI_C_MPI_COMM_NULL_COPY_FN +#define MPI_COMM_DUP_FN OMPI_C_MPI_COMM_DUP_FN + +#define MPI_WIN_NULL_DELETE_FN OMPI_C_MPI_WIN_NULL_DELETE_FN +#define MPI_WIN_NULL_COPY_FN OMPI_C_MPI_WIN_NULL_COPY_FN +#define MPI_WIN_DUP_FN OMPI_C_MPI_WIN_DUP_FN + +/* MPI_CONVERSION_FN_NULL is a sentinel value, but it has to be large + enough to be the same size as a valid function pointer. It + therefore shares many characteristics between Fortran constants and + Fortran sentinel functions. For example, it shares the problem of + having Fortran compilers have all-caps versions of the symbols that + must be able to be present, and therefore has to be in this + conditional block in mpi.h. */ +#define MPI_CONVERSION_FN_NULL ((MPI_Datarep_conversion_function*) 0) +#endif + +OMPI_DECLSPEC int OMPI_C_MPI_TYPE_NULL_DELETE_FN( MPI_Datatype datatype, + int type_keyval, + void* attribute_val_out, + void* extra_state ); +OMPI_DECLSPEC int OMPI_C_MPI_TYPE_NULL_COPY_FN( MPI_Datatype datatype, + int type_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ); +OMPI_DECLSPEC int OMPI_C_MPI_TYPE_DUP_FN( MPI_Datatype datatype, + int type_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ); +OMPI_DECLSPEC int OMPI_C_MPI_COMM_NULL_DELETE_FN( MPI_Comm comm, + int comm_keyval, + void* attribute_val_out, + void* extra_state ); +OMPI_DECLSPEC int OMPI_C_MPI_COMM_NULL_COPY_FN( MPI_Comm comm, + int comm_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ); +OMPI_DECLSPEC int OMPI_C_MPI_COMM_DUP_FN( MPI_Comm comm, int comm_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ); +OMPI_DECLSPEC int OMPI_C_MPI_WIN_NULL_DELETE_FN( MPI_Win window, + int win_keyval, + void* attribute_val_out, + void* extra_state ); +OMPI_DECLSPEC int OMPI_C_MPI_WIN_NULL_COPY_FN( MPI_Win window, int win_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ); +OMPI_DECLSPEC int OMPI_C_MPI_WIN_DUP_FN( MPI_Win window, int win_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ); + +/* + * External variables + * + * The below externs use the ompi_predefined_xxx_t structures to maintain + * back compatibility between MPI library versions. + * See ompi/communicator/communicator.h comments with struct ompi_communicator_t + * for full explanation why we chose to use the ompi_predefined_xxx_t structure. + */ +OMPI_DECLSPEC extern struct ompi_predefined_communicator_t ompi_mpi_comm_world; +OMPI_DECLSPEC extern struct ompi_predefined_communicator_t ompi_mpi_comm_self; +OMPI_DECLSPEC extern struct ompi_predefined_communicator_t ompi_mpi_comm_null; + +OMPI_DECLSPEC extern struct ompi_predefined_group_t ompi_mpi_group_empty; +OMPI_DECLSPEC extern struct ompi_predefined_group_t ompi_mpi_group_null; + +OMPI_DECLSPEC extern struct ompi_predefined_instance_t ompi_mpi_instance_null; + +OMPI_DECLSPEC extern struct ompi_predefined_request_t ompi_request_null; + +OMPI_DECLSPEC extern struct ompi_predefined_message_t ompi_message_null; +OMPI_DECLSPEC extern struct ompi_predefined_message_t ompi_message_no_proc; + +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_null; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_min; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_max; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_sum; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_prod; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_land; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_band; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_lor; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_bor; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_lxor; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_bxor; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_maxloc; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_minloc; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_replace; +OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_no_op; + + +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_datatype_null; + +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_char; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_signed_char; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_unsigned_char; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_byte; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_short; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_unsigned_short; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_int; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_unsigned; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_long; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_unsigned_long; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_long_long_int; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_unsigned_long_long; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_float; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_double; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_long_double; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_wchar; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_packed; + +/* + * Following are the C++/C99 datatypes + */ +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_cxx_bool; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_cxx_cplex; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_cxx_dblcplex; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_cxx_ldblcplex; + +/* + * Following are the Fortran datatypes + */ +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_logical; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_character; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_integer; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_real; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_dblprec; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_cplex; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_dblcplex; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_ldblcplex; + +/* Aggregate struct datatypes are not const */ +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_2int; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_2integer; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_2real; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_2dblprec; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_2cplex; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_2dblcplex; + +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_float_int; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_double_int; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_longdbl_int; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_short_int; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_long_int; + +/* Optional MPI2 datatypes, always declared and defined, but not "exported" as MPI_LOGICAL1 */ +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_logical1; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_logical2; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_logical4; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_logical8; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_integer1; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_integer2; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_integer4; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_integer8; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_integer16; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_real2; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_real4; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_real8; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_real16; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_complex4; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_complex8; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_complex16; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_complex32; + +/* New datatypes from the MPI 2.2 standard */ +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_int8_t; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_uint8_t; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_int16_t; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_uint16_t; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_int32_t; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_uint32_t; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_int64_t; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_uint64_t; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_aint; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_offset; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_count; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_bool; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_float_complex; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_double_complex; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_long_double_complex; + +OMPI_DECLSPEC extern struct ompi_predefined_errhandler_t ompi_mpi_errhandler_null; +OMPI_DECLSPEC extern struct ompi_predefined_errhandler_t ompi_mpi_errors_are_fatal; +OMPI_DECLSPEC extern struct ompi_predefined_errhandler_t ompi_mpi_errors_abort; +OMPI_DECLSPEC extern struct ompi_predefined_errhandler_t ompi_mpi_errors_return; + +OMPI_DECLSPEC extern struct ompi_predefined_win_t ompi_mpi_win_null; +OMPI_DECLSPEC extern struct ompi_predefined_file_t ompi_mpi_file_null; + +OMPI_DECLSPEC extern struct ompi_predefined_info_t ompi_mpi_info_null; +OMPI_DECLSPEC extern struct ompi_predefined_info_t ompi_mpi_info_env; + +OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUS_IGNORE; +OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE; + +OMPI_DECLSPEC extern MPI_Fint *MPI_F08_STATUS_IGNORE; +OMPI_DECLSPEC extern MPI_Fint *MPI_F08_STATUSES_IGNORE; + +/* + * Removed datatypes. These datatypes are only available if Open MPI + * was configured with --enable-mpi1-compatibility. + * + * These datatypes were formally removed from the MPI specification + * and should no longer be used in MPI applications. + */ +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) +# define MPI_UB OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_ub) +# define MPI_LB OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_lb) + +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_lb; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_ub; + +#else +/* If not building or configured --enable-mpi1-compatibility, then + * we don't want these datatypes, instead we define MPI_UB and + * MPI_LB to our Static Assert message if the compiler supports + * that staticly assert with a nice message. + */ +# if (OMPI_REMOVED_USE_STATIC_ASSERT) +# define MPI_UB THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_UB, MPI_Type_create_resized); +# define MPI_LB THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_LB, MPI_Type_create_resized); +# endif /* OMPI_REMOVED_USE_STATIC_ASSERT */ +#endif /* Removed datatypes */ + + +/* + * MPI predefined handles + */ +#define MPI_COMM_WORLD OMPI_PREDEFINED_GLOBAL( MPI_Comm, ompi_mpi_comm_world) +#define MPI_COMM_SELF OMPI_PREDEFINED_GLOBAL(MPI_Comm, ompi_mpi_comm_self) + +#define MPI_GROUP_EMPTY OMPI_PREDEFINED_GLOBAL(MPI_Group, ompi_mpi_group_empty) + +#define MPI_MESSAGE_NO_PROC OMPI_PREDEFINED_GLOBAL(MPI_Message, ompi_message_no_proc) + +#define MPI_MAX OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_max) +#define MPI_MIN OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_min) +#define MPI_SUM OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_sum) +#define MPI_PROD OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_prod) +#define MPI_LAND OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_land) +#define MPI_BAND OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_band) +#define MPI_LOR OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_lor) +#define MPI_BOR OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_bor) +#define MPI_LXOR OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_lxor) +#define MPI_BXOR OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_bxor) +#define MPI_MAXLOC OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_maxloc) +#define MPI_MINLOC OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_minloc) +#define MPI_REPLACE OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_replace) +#define MPI_NO_OP OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_no_op) + +/* C datatypes */ +#define MPI_DATATYPE_NULL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_datatype_null) +#define MPI_BYTE OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_byte) +#define MPI_PACKED OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_packed) +#define MPI_CHAR OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_char) +#define MPI_SHORT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_short) +#define MPI_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_int) +#define MPI_LONG OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_long) +#define MPI_FLOAT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_float) +#define MPI_DOUBLE OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_double) +#define MPI_LONG_DOUBLE OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_long_double) +#define MPI_UNSIGNED_CHAR OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_unsigned_char) +#define MPI_SIGNED_CHAR OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_signed_char) +#define MPI_UNSIGNED_SHORT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_unsigned_short) +#define MPI_UNSIGNED_LONG OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_unsigned_long) +#define MPI_UNSIGNED OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_unsigned) +#define MPI_FLOAT_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_float_int) +#define MPI_DOUBLE_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_double_int) +#define MPI_LONG_DOUBLE_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_longdbl_int) +#define MPI_LONG_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_long_int) +#define MPI_SHORT_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_short_int) +#define MPI_2INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_2int) + +#define MPI_WCHAR OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_wchar) +#if OPAL_HAVE_LONG_LONG +#define MPI_LONG_LONG_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_long_long_int) +#define MPI_LONG_LONG OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_long_long_int) +#define MPI_UNSIGNED_LONG_LONG OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_unsigned_long_long) +#endif /* OPAL_HAVE_LONG_LONG */ +#define MPI_2COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_2cplex) +#define MPI_2DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_2dblcplex) + +/* Fortran datatype bindings */ +#define MPI_CHARACTER OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_character) +#define MPI_LOGICAL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_logical) +#if OMPI_HAVE_FORTRAN_LOGICAL1 +#define MPI_LOGICAL1 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_logical1) +#endif +#if OMPI_HAVE_FORTRAN_LOGICAL2 +#define MPI_LOGICAL2 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_logical2) +#endif +#if OMPI_HAVE_FORTRAN_LOGICAL4 +#define MPI_LOGICAL4 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_logical4) +#endif +#if OMPI_HAVE_FORTRAN_LOGICAL8 +#define MPI_LOGICAL8 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_logical8) +#endif +#define MPI_INTEGER OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_integer) +#if OMPI_HAVE_FORTRAN_INTEGER1 +#define MPI_INTEGER1 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_integer1) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +#define MPI_INTEGER2 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_integer2) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +#define MPI_INTEGER4 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_integer4) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +#define MPI_INTEGER8 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_integer8) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +#define MPI_INTEGER16 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_integer16) +#endif +#define MPI_REAL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_real) +#if OMPI_HAVE_FORTRAN_REAL2 +#define MPI_REAL2 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_real2) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 +#define MPI_REAL4 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_real4) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 +#define MPI_REAL8 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_real8) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 +#define MPI_REAL16 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_real16) +#endif +#define MPI_DOUBLE_PRECISION OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_dblprec) +#define MPI_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cplex) +#if OMPI_HAVE_FORTRAN_REAL2 +#define MPI_COMPLEX4 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_complex4) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 +#define MPI_COMPLEX8 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_complex8) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 +#define MPI_COMPLEX16 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_complex16) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 +#define MPI_COMPLEX32 OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_complex32) +#endif +#define MPI_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_dblcplex) +#define MPI_2REAL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_2real) +#define MPI_2DOUBLE_PRECISION OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_2dblprec) +#define MPI_2INTEGER OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_2integer) + +/* New datatypes from the MPI 2.2 standard */ +#define MPI_INT8_T OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_int8_t) +#define MPI_UINT8_T OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_uint8_t) +#define MPI_INT16_T OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_int16_t) +#define MPI_UINT16_T OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_uint16_t) +#define MPI_INT32_T OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_int32_t) +#define MPI_UINT32_T OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_uint32_t) +#define MPI_INT64_T OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_int64_t) +#define MPI_UINT64_T OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_uint64_t) +#define MPI_AINT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_aint) +#define MPI_OFFSET OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_offset) +#define MPI_C_BOOL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_bool) +#if HAVE_FLOAT__COMPLEX +#define MPI_C_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_float_complex) +#define MPI_C_FLOAT_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_float_complex) +#endif +#if HAVE_DOUBLE__COMPLEX +#define MPI_C_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_double_complex) +#endif +#if HAVE_LONG_DOUBLE__COMPLEX +#define MPI_C_LONG_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_long_double_complex) +#endif +#define MPI_CXX_BOOL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_bool) +#define MPI_CXX_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_cplex) +#define MPI_CXX_FLOAT_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_cplex) +#define MPI_CXX_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_dblcplex) +#define MPI_CXX_LONG_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_ldblcplex) + +/* New datatypes from the 3.0 standard */ +#define MPI_COUNT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_count) + +#define MPI_ERRORS_ARE_FATAL OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errors_are_fatal) +#define MPI_ERRORS_ABORT OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errors_abort) +#define MPI_ERRORS_RETURN OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errors_return) + +/* Typeclass definition for MPI_Type_match_size */ +#define MPI_TYPECLASS_INTEGER 1 +#define MPI_TYPECLASS_REAL 2 +#define MPI_TYPECLASS_COMPLEX 3 + +/* Aint helper macros (MPI-3.1) */ +#define MPI_Aint_add(base, disp) ((MPI_Aint) ((char *) (base) + (disp))) +#define MPI_Aint_diff(addr1, addr2) ((MPI_Aint) ((char *) (addr1) - (char *) (addr2))) +#define PMPI_Aint_add(base, disp) MPI_Aint_add(base, disp) +#define PMPI_Aint_diff(addr1, addr2) MPI_Aint_diff(addr1, addr2) + +/* + * Predefined info keys + */ +#define MPI_INFO_KEY_SESSION_PSET_SIZE "size" + +/* + * MPI API + */ + +OMPI_DECLSPEC int MPI_Abort(MPI_Comm comm, int errorcode); +OMPI_DECLSPEC int MPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win); +OMPI_DECLSPEC int MPI_Add_error_class(int *errorclass); +OMPI_DECLSPEC int MPI_Add_error_code(int errorclass, int *errorcode); +OMPI_DECLSPEC int MPI_Add_error_string(int errorcode, const char *string); +OMPI_DECLSPEC int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, + void *baseptr); +OMPI_DECLSPEC int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Allreduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Barrier(MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Bcast_init(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Bsend(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Buffer_attach(void *buffer, int size); +OMPI_DECLSPEC int MPI_Buffer_detach(void *buffer, int *size); +OMPI_DECLSPEC int MPI_Cancel(MPI_Request *request); +OMPI_DECLSPEC int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]); +OMPI_DECLSPEC int MPI_Cart_create(MPI_Comm old_comm, int ndims, const int dims[], + const int periods[], int reorder, MPI_Comm *comm_cart); +OMPI_DECLSPEC int MPI_Cart_get(MPI_Comm comm, int maxdims, int dims[], + int periods[], int coords[]); +OMPI_DECLSPEC int MPI_Cart_map(MPI_Comm comm, int ndims, const int dims[], + const int periods[], int *newrank); +OMPI_DECLSPEC int MPI_Cart_rank(MPI_Comm comm, const int coords[], int *rank); +OMPI_DECLSPEC int MPI_Cart_shift(MPI_Comm comm, int direction, int disp, + int *rank_source, int *rank_dest); +OMPI_DECLSPEC int MPI_Cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *new_comm); +OMPI_DECLSPEC int MPI_Cartdim_get(MPI_Comm comm, int *ndims); +OMPI_DECLSPEC int MPI_Close_port(const char *port_name); +OMPI_DECLSPEC int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, + MPI_Comm comm, MPI_Comm *newcomm); +OMPI_DECLSPEC MPI_Fint MPI_Comm_c2f(MPI_Comm comm); +OMPI_DECLSPEC int MPI_Comm_call_errhandler(MPI_Comm comm, int errorcode); +OMPI_DECLSPEC int MPI_Comm_compare(MPI_Comm comm1, MPI_Comm comm2, int *result); +OMPI_DECLSPEC int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, + MPI_Comm comm, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_create_errhandler(MPI_Comm_errhandler_function *function, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_copy_attr_fn, + MPI_Comm_delete_attr_function *comm_delete_attr_fn, + int *comm_keyval, void *extra_state); +OMPI_DECLSPEC int MPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_create_from_group(MPI_Group group, const char *tag, MPI_Info info, + MPI_Errhandler errhandler, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval); +OMPI_DECLSPEC int MPI_Comm_disconnect(MPI_Comm *comm); +OMPI_DECLSPEC int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_idup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm, + MPI_Request *request); +OMPI_DECLSPEC MPI_Comm MPI_Comm_f2c(MPI_Fint comm); +OMPI_DECLSPEC int MPI_Comm_free_keyval(int *comm_keyval); +OMPI_DECLSPEC int MPI_Comm_free(MPI_Comm *comm); +OMPI_DECLSPEC int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, + void *attribute_val, int *flag); +OMPI_DECLSPEC int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int nodes[], + const int degrees[], const int targets[], + const int weights[], MPI_Info info, + int reorder, MPI_Comm * newcomm); +OMPI_DECLSPEC int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old, + int indegree, const int sources[], + const int sourceweights[], + int outdegree, + const int destinations[], + const int destweights[], + MPI_Info info, int reorder, + MPI_Comm *comm_dist_graph); +OMPI_DECLSPEC int MPI_Dist_graph_neighbors(MPI_Comm comm, int maxindegree, + int sources[], int sourceweights[], + int maxoutdegree, + int destinations[], + int destweights[]); +OMPI_DECLSPEC int MPI_Dist_graph_neighbors_count(MPI_Comm comm, + int *inneighbors, + int *outneighbors, + int *weighted); +OMPI_DECLSPEC int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *erhandler); +OMPI_DECLSPEC int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_Comm_get_name(MPI_Comm comm, char *comm_name, int *resultlen); +OMPI_DECLSPEC int MPI_Comm_get_parent(MPI_Comm *parent); +OMPI_DECLSPEC int MPI_Comm_group(MPI_Comm comm, MPI_Group *group); +OMPI_DECLSPEC int MPI_Comm_join(int fd, MPI_Comm *intercomm); +OMPI_DECLSPEC int MPI_Comm_rank(MPI_Comm comm, int *rank); +OMPI_DECLSPEC int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group); +OMPI_DECLSPEC int MPI_Comm_remote_size(MPI_Comm comm, int *size); +OMPI_DECLSPEC int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val); +OMPI_DECLSPEC int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler); +OMPI_DECLSPEC int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info); +OMPI_DECLSPEC int MPI_Comm_set_name(MPI_Comm comm, const char *comm_name); +OMPI_DECLSPEC int MPI_Comm_size(MPI_Comm comm, int *size); +OMPI_DECLSPEC int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info, + int root, MPI_Comm comm, MPI_Comm *intercomm, + int array_of_errcodes[]); +OMPI_DECLSPEC int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_of_argv[], + const int array_of_maxprocs[], const MPI_Info array_of_info[], + int root, MPI_Comm comm, MPI_Comm *intercomm, + int array_of_errcodes[]); +OMPI_DECLSPEC int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_test_inter(MPI_Comm comm, int *flag); +OMPI_DECLSPEC int MPI_Compare_and_swap(const void *origin_addr, const void *compare_addr, + void *result_addr, MPI_Datatype datatype, int target_rank, + MPI_Aint target_disp, MPI_Win win); +OMPI_DECLSPEC int MPI_Dims_create(int nnodes, int ndims, int dims[]); +OMPI_DECLSPEC MPI_Fint MPI_Errhandler_c2f(MPI_Errhandler errhandler); +OMPI_DECLSPEC MPI_Errhandler MPI_Errhandler_f2c(MPI_Fint errhandler); +OMPI_DECLSPEC int MPI_Errhandler_free(MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_Error_class(int errorcode, int *errorclass); +OMPI_DECLSPEC int MPI_Error_string(int errorcode, char *string, int *resultlen); +OMPI_DECLSPEC int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Exscan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Fetch_and_op(const void *origin_addr, void *result_addr, MPI_Datatype datatype, + int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win); +OMPI_DECLSPEC MPI_Fint MPI_File_c2f(MPI_File file); +OMPI_DECLSPEC MPI_File MPI_File_f2c(MPI_Fint file); +OMPI_DECLSPEC int MPI_File_call_errhandler(MPI_File fh, int errorcode); +OMPI_DECLSPEC int MPI_File_create_errhandler(MPI_File_errhandler_function *function, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_File_set_errhandler( MPI_File file, MPI_Errhandler errhandler); +OMPI_DECLSPEC int MPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_File_open(MPI_Comm comm, const char *filename, int amode, + MPI_Info info, MPI_File *fh); +OMPI_DECLSPEC int MPI_File_close(MPI_File *fh); +OMPI_DECLSPEC int MPI_File_delete(const char *filename, MPI_Info info); +OMPI_DECLSPEC int MPI_File_set_size(MPI_File fh, MPI_Offset size); +OMPI_DECLSPEC int MPI_File_preallocate(MPI_File fh, MPI_Offset size); +OMPI_DECLSPEC int MPI_File_get_size(MPI_File fh, MPI_Offset *size); +OMPI_DECLSPEC int MPI_File_get_group(MPI_File fh, MPI_Group *group); +OMPI_DECLSPEC int MPI_File_get_amode(MPI_File fh, int *amode); +OMPI_DECLSPEC int MPI_File_set_info(MPI_File fh, MPI_Info info); +OMPI_DECLSPEC int MPI_File_get_info(MPI_File fh, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, + MPI_Datatype filetype, const char *datarep, MPI_Info info); +OMPI_DECLSPEC int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, + MPI_Datatype *etype, + MPI_Datatype *filetype, char *datarep); +OMPI_DECLSPEC int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iread_at_all(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iwrite_at_all(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_read(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_read_all(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write_all(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_iread(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iwrite(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iread_all(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iwrite_all(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence); +OMPI_DECLSPEC int MPI_File_get_position(MPI_File fh, MPI_Offset *offset); +OMPI_DECLSPEC int MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, + MPI_Offset *disp); +OMPI_DECLSPEC int MPI_File_read_shared(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write_shared(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_iread_shared(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iwrite_shared(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_read_ordered(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write_ordered(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence); +OMPI_DECLSPEC int MPI_File_get_position_shared(MPI_File fh, MPI_Offset *offset); +OMPI_DECLSPEC int MPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype); +OMPI_DECLSPEC int MPI_File_read_at_all_end(MPI_File fh, void *buf, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype); +OMPI_DECLSPEC int MPI_File_write_at_all_end(MPI_File fh, const void *buf, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_read_all_begin(MPI_File fh, void *buf, int count, + MPI_Datatype datatype); +OMPI_DECLSPEC int MPI_File_read_all_end(MPI_File fh, void *buf, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write_all_begin(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype); +OMPI_DECLSPEC int MPI_File_write_all_end(MPI_File fh, const void *buf, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_read_ordered_begin(MPI_File fh, void *buf, int count, + MPI_Datatype datatype); +OMPI_DECLSPEC int MPI_File_read_ordered_end(MPI_File fh, void *buf, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_write_ordered_begin(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype); +OMPI_DECLSPEC int MPI_File_write_ordered_end(MPI_File fh, const void *buf, MPI_Status *status); +OMPI_DECLSPEC int MPI_File_get_type_extent(MPI_File fh, MPI_Datatype datatype, + MPI_Aint *extent); +OMPI_DECLSPEC int MPI_File_set_atomicity(MPI_File fh, int flag); +OMPI_DECLSPEC int MPI_File_get_atomicity(MPI_File fh, int *flag); +OMPI_DECLSPEC int MPI_File_sync(MPI_File fh); +OMPI_DECLSPEC int MPI_Finalize(void); +OMPI_DECLSPEC int MPI_Finalized(int *flag); +OMPI_DECLSPEC int MPI_Free_mem(void *base); +OMPI_DECLSPEC int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Get_address(const void *location, MPI_Aint *address); +OMPI_DECLSPEC int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count); +OMPI_DECLSPEC int MPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, int *count); +OMPI_DECLSPEC int MPI_Get_elements_x(const MPI_Status *status, MPI_Datatype datatype, MPI_Count *count); +OMPI_DECLSPEC int MPI_Get(void *origin_addr, int origin_count, + MPI_Datatype origin_datatype, int target_rank, + MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Win win); +OMPI_DECLSPEC int MPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + void *result_addr, int result_count, MPI_Datatype result_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win); +OMPI_DECLSPEC int MPI_Get_library_version(char *version, int *resultlen); +OMPI_DECLSPEC int MPI_Get_processor_name(char *name, int *resultlen); +OMPI_DECLSPEC int MPI_Get_version(int *version, int *subversion); +OMPI_DECLSPEC int MPI_Graph_create(MPI_Comm comm_old, int nnodes, const int index[], + const int edges[], int reorder, MPI_Comm *comm_graph); +OMPI_DECLSPEC int MPI_Graph_get(MPI_Comm comm, int maxindex, int maxedges, + int index[], int edges[]); +OMPI_DECLSPEC int MPI_Graph_map(MPI_Comm comm, int nnodes, const int index[], const int edges[], + int *newrank); +OMPI_DECLSPEC int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors); +OMPI_DECLSPEC int MPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors, + int neighbors[]); +OMPI_DECLSPEC int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges); +OMPI_DECLSPEC int MPI_Grequest_complete(MPI_Request request); +OMPI_DECLSPEC int MPI_Grequest_start(MPI_Grequest_query_function *query_fn, + MPI_Grequest_free_function *free_fn, + MPI_Grequest_cancel_function *cancel_fn, + void *extra_state, MPI_Request *request); +OMPI_DECLSPEC MPI_Fint MPI_Group_c2f(MPI_Group group); +OMPI_DECLSPEC int MPI_Group_compare(MPI_Group group1, MPI_Group group2, int *result); +OMPI_DECLSPEC int MPI_Group_difference(MPI_Group group1, MPI_Group group2, + MPI_Group *newgroup); +OMPI_DECLSPEC int MPI_Group_excl(MPI_Group group, int n, const int ranks[], + MPI_Group *newgroup); +OMPI_DECLSPEC MPI_Group MPI_Group_f2c(MPI_Fint group); +OMPI_DECLSPEC int MPI_Group_free(MPI_Group *group); +OMPI_DECLSPEC int MPI_Group_from_session_pset (MPI_Session session, const char *pset_name, MPI_Group *newgroup); +OMPI_DECLSPEC int MPI_Group_incl(MPI_Group group, int n, const int ranks[], + MPI_Group *newgroup); +OMPI_DECLSPEC int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, + MPI_Group *newgroup); +OMPI_DECLSPEC int MPI_Group_range_excl(MPI_Group group, int n, int ranges[][3], + MPI_Group *newgroup); +OMPI_DECLSPEC int MPI_Group_range_incl(MPI_Group group, int n, int ranges[][3], + MPI_Group *newgroup); +OMPI_DECLSPEC int MPI_Group_rank(MPI_Group group, int *rank); +OMPI_DECLSPEC int MPI_Group_size(MPI_Group group, int *size); +OMPI_DECLSPEC int MPI_Group_translate_ranks(MPI_Group group1, int n, const int ranks1[], + MPI_Group group2, int ranks2[]); +OMPI_DECLSPEC int MPI_Group_union(MPI_Group group1, MPI_Group group2, + MPI_Group *newgroup); +OMPI_DECLSPEC int MPI_Ibsend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Improbe(int source, int tag, MPI_Comm comm, + int *flag, MPI_Message *message, + MPI_Status *status); +OMPI_DECLSPEC int MPI_Imrecv(void *buf, int count, MPI_Datatype type, + MPI_Message *message, MPI_Request *request); +OMPI_DECLSPEC MPI_Fint MPI_Info_c2f(MPI_Info info); +OMPI_DECLSPEC int MPI_Info_create(MPI_Info *info); +OMPI_DECLSPEC int MPI_Info_delete(MPI_Info info, const char *key); +OMPI_DECLSPEC int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo); +OMPI_DECLSPEC MPI_Info MPI_Info_f2c(MPI_Fint info); +OMPI_DECLSPEC int MPI_Info_free(MPI_Info *info); +OMPI_DECLSPEC int MPI_Info_get(MPI_Info info, const char *key, int valuelen, + char *value, int *flag); +OMPI_DECLSPEC int MPI_Info_get_nkeys(MPI_Info info, int *nkeys); +OMPI_DECLSPEC int MPI_Info_get_nthkey(MPI_Info info, int n, char *key); +OMPI_DECLSPEC int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, + int *flag); +OMPI_DECLSPEC int MPI_Info_get_string(MPI_Info info, const char *key, int *buflen, + char *value, int *flag); +OMPI_DECLSPEC int MPI_Info_set(MPI_Info info, const char *key, const char *value); +OMPI_DECLSPEC int MPI_Init(int *argc, char ***argv); +OMPI_DECLSPEC int MPI_Initialized(int *flag); +OMPI_DECLSPEC int MPI_Init_thread(int *argc, char ***argv, int required, + int *provided); +OMPI_DECLSPEC int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, + MPI_Comm bridge_comm, int remote_leader, + int tag, MPI_Comm *newintercomm); +OMPI_DECLSPEC int MPI_Intercomm_create_from_groups (MPI_Group local_group, int local_leader, MPI_Group remote_group, + int remote_leader, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newintercomm); +OMPI_DECLSPEC int MPI_Intercomm_merge(MPI_Comm intercomm, int high, + MPI_Comm *newintercomm); +OMPI_DECLSPEC int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, + MPI_Status *status); +OMPI_DECLSPEC int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Isendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Isendrecv_replace(void * buf, int count, MPI_Datatype datatype, + int dest, int sendtag, int source, int recvtag, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Is_thread_main(int *flag); +OMPI_DECLSPEC int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name); +OMPI_DECLSPEC MPI_Fint MPI_Message_c2f(MPI_Message message); +OMPI_DECLSPEC MPI_Message MPI_Message_f2c(MPI_Fint message); +OMPI_DECLSPEC int MPI_Mprobe(int source, int tag, MPI_Comm comm, + MPI_Message *message, + MPI_Status *status); +OMPI_DECLSPEC int MPI_Mrecv(void *buf, int count, MPI_Datatype type, + MPI_Message *message, MPI_Status *status); +OMPI_DECLSPEC int MPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC MPI_Fint MPI_Op_c2f(MPI_Op op); +OMPI_DECLSPEC int MPI_Op_commutative(MPI_Op op, int *commute); +OMPI_DECLSPEC int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op); +OMPI_DECLSPEC int MPI_Open_port(MPI_Info info, char *port_name); +OMPI_DECLSPEC MPI_Op MPI_Op_f2c(MPI_Fint op); +OMPI_DECLSPEC int MPI_Op_free(MPI_Op *op); +OMPI_DECLSPEC int MPI_Pack_external(const char datarep[], const void *inbuf, int incount, + MPI_Datatype datatype, void *outbuf, + MPI_Aint outsize, MPI_Aint *position); +OMPI_DECLSPEC int MPI_Pack_external_size(const char datarep[], int incount, + MPI_Datatype datatype, MPI_Aint *size); +OMPI_DECLSPEC int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, + void *outbuf, int outsize, int *position, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Pack_size(int incount, MPI_Datatype datatype, MPI_Comm comm, + int *size); +OMPI_DECLSPEC int MPI_Parrived(MPI_Request request, MPI_Count partition, int *flag); +OMPI_DECLSPEC int MPI_Pcontrol(const int level, ...); +OMPI_DECLSPEC int MPI_Pready(int partitions, MPI_Request request); +OMPI_DECLSPEC int MPI_Pready_range(int partition_low, int partition_high, + MPI_Request request); +OMPI_DECLSPEC int MPI_Pready_list(int length, int partition_list[], MPI_Request request); +OMPI_DECLSPEC int MPI_Precv_init(void* buf, int partitions, MPI_Count count, + MPI_Datatype datatype, int source, int tag, MPI_Comm comm, + MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC int MPI_Psend_init(const void* buf, int partitions, MPI_Count count, + MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, + MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Publish_name(const char *service_name, MPI_Info info, + const char *port_name); +OMPI_DECLSPEC int MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Win win); +OMPI_DECLSPEC int MPI_Query_thread(int *provided); +OMPI_DECLSPEC int MPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request); +OMPI_DECLSPEC int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, + int tag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_local(const void *inbuf, void *inoutbuf, int count, + MPI_Datatype datatype, MPI_Op op); +OMPI_DECLSPEC int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Register_datarep(const char *datarep, + MPI_Datarep_conversion_function *read_conversion_fn, + MPI_Datarep_conversion_function *write_conversion_fn, + MPI_Datarep_extent_function *dtype_file_extent_fn, + void *extra_state); +OMPI_DECLSPEC MPI_Fint MPI_Request_c2f(MPI_Request request); +OMPI_DECLSPEC MPI_Request MPI_Request_f2c(MPI_Fint request); +OMPI_DECLSPEC int MPI_Request_free(MPI_Request *request); +OMPI_DECLSPEC int MPI_Request_get_status(MPI_Request request, int *flag, + MPI_Status *status); +OMPI_DECLSPEC int MPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, + MPI_Win win, MPI_Request *request); +OMPI_DECLSPEC int MPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + void *result_addr, int result_count, MPI_Datatype result_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op, + MPI_Win win, MPI_Request *request); +OMPI_DECLSPEC int MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_cout, + MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request); +OMPI_DECLSPEC int MPI_Rsend(const void *ibuf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, + MPI_Request *request); +OMPI_DECLSPEC int MPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int MPI_Scatterv_init(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, + MPI_Request *request); +OMPI_DECLSPEC int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, + MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, + int dest, int sendtag, int source, int recvtag, + MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC MPI_Fint MPI_Session_c2f (const MPI_Session session); +OMPI_DECLSPEC int MPI_Session_create_errhandler (MPI_Session_errhandler_function *session_errhandler_fn, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_Session_finalize (MPI_Session *session); +OMPI_DECLSPEC int MPI_Session_get_info (MPI_Session session, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_Session_get_num_psets (MPI_Session session, MPI_Info info, int *npset_names); +OMPI_DECLSPEC int MPI_Session_get_nth_pset (MPI_Session session, MPI_Info info, int n, int *len, char *pset_name); +OMPI_DECLSPEC int MPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, + MPI_Session *session); +OMPI_DECLSPEC MPI_Session MPI_Session_f2c (MPI_Fint session); +OMPI_DECLSPEC int MPI_Session_set_info (MPI_Session session, MPI_Info info); +OMPI_DECLSPEC int MPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, + MPI_Request *request); +OMPI_DECLSPEC int MPI_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm); +OMPI_DECLSPEC int MPI_Start(MPI_Request *request); +OMPI_DECLSPEC int MPI_Startall(int count, MPI_Request array_of_requests[]); +OMPI_DECLSPEC int MPI_Status_c2f(const MPI_Status *c_status, MPI_Fint *f_status); +OMPI_DECLSPEC int MPI_Status_c2f08(const MPI_Status *c_status, MPI_F08_status *f08_status); +OMPI_DECLSPEC int MPI_Status_f082c(const MPI_F08_status *f08_status, MPI_Status *c_status); +OMPI_DECLSPEC int MPI_Status_f082f(const MPI_F08_status *f08_status, MPI_Fint *f_status); +OMPI_DECLSPEC int MPI_Status_f2c(const MPI_Fint *f_status, MPI_Status *c_status); +OMPI_DECLSPEC int MPI_Status_f2f08(const MPI_Fint *f_status, MPI_F08_status *f08_status); +OMPI_DECLSPEC int MPI_Status_set_cancelled(MPI_Status *status, int flag); +OMPI_DECLSPEC int MPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype, + int count); +OMPI_DECLSPEC int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, + MPI_Count count); +OMPI_DECLSPEC int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag, + MPI_Status array_of_statuses[]); +OMPI_DECLSPEC int MPI_Testany(int count, MPI_Request array_of_requests[], int *index, + int *flag, MPI_Status *status); +OMPI_DECLSPEC int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status); +OMPI_DECLSPEC int MPI_Test_cancelled(const MPI_Status *status, int *flag); +OMPI_DECLSPEC int MPI_Testsome(int incount, MPI_Request array_of_requests[], + int *outcount, int array_of_indices[], + MPI_Status array_of_statuses[]); +OMPI_DECLSPEC int MPI_Topo_test(MPI_Comm comm, int *status); +OMPI_DECLSPEC MPI_Fint MPI_Type_c2f(MPI_Datatype datatype); +OMPI_DECLSPEC int MPI_Type_commit(MPI_Datatype *type); +OMPI_DECLSPEC int MPI_Type_contiguous(int count, MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_darray(int size, int rank, int ndims, + const int gsize_array[], const int distrib_array[], + const int darg_array[], const int psize_array[], + int order, MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_f90_integer(int r, MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_hindexed_block(int count, int blocklength, + const MPI_Aint array_of_displacements[], + MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_hindexed(int count, const int array_of_blocklengths[], + const MPI_Aint array_of_displacements[], + MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_hvector(int count, int blocklength, MPI_Aint stride, + MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_keyval(MPI_Type_copy_attr_function *type_copy_attr_fn, + MPI_Type_delete_attr_function *type_delete_attr_fn, + int *type_keyval, void *extra_state); +OMPI_DECLSPEC int MPI_Type_create_indexed_block(int count, int blocklength, + const int array_of_displacements[], + MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_struct(int count, const int array_of_block_lengths[], + const MPI_Aint array_of_displacements[], + const MPI_Datatype array_of_types[], + MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_subarray(int ndims, const int size_array[], const int subsize_array[], + const int start_array[], int order, + MPI_Datatype oldtype, MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, + MPI_Aint extent, MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_delete_attr(MPI_Datatype type, int type_keyval); +OMPI_DECLSPEC int MPI_Type_dup(MPI_Datatype type, MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_free(MPI_Datatype *type); +OMPI_DECLSPEC int MPI_Type_free_keyval(int *type_keyval); +OMPI_DECLSPEC MPI_Datatype MPI_Type_f2c(MPI_Fint datatype); +OMPI_DECLSPEC int MPI_Type_get_attr(MPI_Datatype type, int type_keyval, + void *attribute_val, int *flag); +OMPI_DECLSPEC int MPI_Type_get_contents(MPI_Datatype mtype, int max_integers, + int max_addresses, int max_datatypes, + int array_of_integers[], + MPI_Aint array_of_addresses[], + MPI_Datatype array_of_datatypes[]); +OMPI_DECLSPEC int MPI_Type_get_envelope(MPI_Datatype type, int *num_integers, + int *num_addresses, int *num_datatypes, + int *combiner); +OMPI_DECLSPEC int MPI_Type_get_extent(MPI_Datatype type, MPI_Aint *lb, + MPI_Aint *extent); +OMPI_DECLSPEC int MPI_Type_get_extent_x(MPI_Datatype type, MPI_Count *lb, + MPI_Count *extent); +OMPI_DECLSPEC int MPI_Type_get_name(MPI_Datatype type, char *type_name, + int *resultlen); +OMPI_DECLSPEC int MPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb, + MPI_Aint *true_extent); +OMPI_DECLSPEC int MPI_Type_get_true_extent_x(MPI_Datatype datatype, MPI_Count *true_lb, + MPI_Count *true_extent); +OMPI_DECLSPEC int MPI_Type_indexed(int count, const int array_of_blocklengths[], + const int array_of_displacements[], + MPI_Datatype oldtype, MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *type); +OMPI_DECLSPEC int MPI_Type_set_attr(MPI_Datatype type, int type_keyval, + void *attr_val); +OMPI_DECLSPEC int MPI_Type_set_name(MPI_Datatype type, const char *type_name); +OMPI_DECLSPEC int MPI_Type_size(MPI_Datatype type, int *size); +OMPI_DECLSPEC int MPI_Type_size_x(MPI_Datatype type, MPI_Count *size); +OMPI_DECLSPEC int MPI_Type_vector(int count, int blocklength, int stride, + MPI_Datatype oldtype, MPI_Datatype *newtype); +OMPI_DECLSPEC int MPI_Unpack(const void *inbuf, int insize, int *position, + void *outbuf, int outcount, MPI_Datatype datatype, + MPI_Comm comm); +OMPI_DECLSPEC int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port_name); +OMPI_DECLSPEC int MPI_Unpack_external (const char datarep[], const void *inbuf, MPI_Aint insize, + MPI_Aint *position, void *outbuf, int outcount, + MPI_Datatype datatype); +OMPI_DECLSPEC int MPI_Waitall(int count, MPI_Request array_of_requests[], + MPI_Status *array_of_statuses); +OMPI_DECLSPEC int MPI_Waitany(int count, MPI_Request array_of_requests[], + int *index, MPI_Status *status); +OMPI_DECLSPEC int MPI_Wait(MPI_Request *request, MPI_Status *status); +OMPI_DECLSPEC int MPI_Waitsome(int incount, MPI_Request array_of_requests[], + int *outcount, int array_of_indices[], + MPI_Status array_of_statuses[]); +OMPI_DECLSPEC int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info, + MPI_Comm comm, void *baseptr, MPI_Win *win); +OMPI_DECLSPEC int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, + MPI_Comm comm, void *baseptr, MPI_Win *win); +OMPI_DECLSPEC int MPI_Win_attach(MPI_Win win, void *base, MPI_Aint size); +OMPI_DECLSPEC MPI_Fint MPI_Win_c2f(MPI_Win win); +OMPI_DECLSPEC int MPI_Win_call_errhandler(MPI_Win win, int errorcode); +OMPI_DECLSPEC int MPI_Win_complete(MPI_Win win); +OMPI_DECLSPEC int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, + MPI_Info info, MPI_Comm comm, MPI_Win *win); +OMPI_DECLSPEC int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win); +OMPI_DECLSPEC int MPI_Win_create_errhandler(MPI_Win_errhandler_function *function, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn, + MPI_Win_delete_attr_function *win_delete_attr_fn, + int *win_keyval, void *extra_state); +OMPI_DECLSPEC int MPI_Win_delete_attr(MPI_Win win, int win_keyval); +OMPI_DECLSPEC int MPI_Win_detach(MPI_Win win, const void *base); +OMPI_DECLSPEC MPI_Win MPI_Win_f2c(MPI_Fint win); +OMPI_DECLSPEC int MPI_Win_fence(int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int MPI_Win_flush(int rank, MPI_Win win); +OMPI_DECLSPEC int MPI_Win_flush_all(MPI_Win win); +OMPI_DECLSPEC int MPI_Win_flush_local(int rank, MPI_Win win); +OMPI_DECLSPEC int MPI_Win_flush_local_all(MPI_Win win); +OMPI_DECLSPEC int MPI_Win_free(MPI_Win *win); +OMPI_DECLSPEC int MPI_Win_free_keyval(int *win_keyval); +OMPI_DECLSPEC int MPI_Win_get_attr(MPI_Win win, int win_keyval, + void *attribute_val, int *flag); +OMPI_DECLSPEC int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_Win_get_group(MPI_Win win, MPI_Group *group); +OMPI_DECLSPEC int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); +OMPI_DECLSPEC int MPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int MPI_Win_lock_all(int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int MPI_Win_post(MPI_Group group, int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int MPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val); +OMPI_DECLSPEC int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler); +OMPI_DECLSPEC int MPI_Win_set_info(MPI_Win win, MPI_Info info); +OMPI_DECLSPEC int MPI_Win_set_name(MPI_Win win, const char *win_name); +OMPI_DECLSPEC int MPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, void *baseptr); +OMPI_DECLSPEC int MPI_Win_start(MPI_Group group, int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int MPI_Win_sync(MPI_Win win); +OMPI_DECLSPEC int MPI_Win_test(MPI_Win win, int *flag); +OMPI_DECLSPEC int MPI_Win_unlock(int rank, MPI_Win win); +OMPI_DECLSPEC int MPI_Win_unlock_all(MPI_Win win); +OMPI_DECLSPEC int MPI_Win_wait(MPI_Win win); +OMPI_DECLSPEC double MPI_Wtick(void); +OMPI_DECLSPEC double MPI_Wtime(void); + + /* + * Profiling MPI API + */ +OMPI_DECLSPEC int PMPI_Abort(MPI_Comm comm, int errorcode); +OMPI_DECLSPEC int PMPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win); +OMPI_DECLSPEC int PMPI_Add_error_class(int *errorclass); +OMPI_DECLSPEC int PMPI_Add_error_code(int errorclass, int *errorcode); +OMPI_DECLSPEC int PMPI_Add_error_string(int errorcode, const char *string); +OMPI_DECLSPEC int PMPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], + const int displs[], MPI_Datatype recvtype, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alloc_mem(MPI_Aint size, MPI_Info info, + void *baseptr); +OMPI_DECLSPEC int PMPI_Allreduce(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Allreduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Dist_graph_create(MPI_Comm comm_old, int n, const int nodes[], + const int degrees[], const int targets[], + const int weights[], MPI_Info info, + int reorder, MPI_Comm * newcomm); +OMPI_DECLSPEC int PMPI_Dist_graph_create_adjacent(MPI_Comm comm_old, + int indegree, const int sources[], + const int sourceweights[], + int outdegree, + const int destinations[], + const int destweights[], + MPI_Info info, int reorder, + MPI_Comm *comm_dist_graph); +OMPI_DECLSPEC int PMPI_Dist_graph_neighbors(MPI_Comm comm, int maxindegree, + int sources[], int sourceweights[], + int maxoutdegree, + int destinations[], + int destweights[]); +OMPI_DECLSPEC int PMPI_Dist_graph_neighbors_count(MPI_Comm comm, + int *inneighbors, + int *outneighbors, + int *weighted); +OMPI_DECLSPEC int PMPI_Barrier(MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ibarrier(MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Barrier_init(MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Bcast(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Bcast_init(void *buffer, int count, MPI_Datatype datatype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Bsend(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Buffer_attach(void *buffer, int size); +OMPI_DECLSPEC int PMPI_Buffer_detach(void *buffer, int *size); +OMPI_DECLSPEC int PMPI_Cancel(MPI_Request *request); +OMPI_DECLSPEC int PMPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]); +OMPI_DECLSPEC int PMPI_Cart_create(MPI_Comm old_comm, int ndims, const int dims[], + const int periods[], int reorder, MPI_Comm *comm_cart); +OMPI_DECLSPEC int PMPI_Cart_get(MPI_Comm comm, int maxdims, int dims[], + int periods[], int coords[]); +OMPI_DECLSPEC int PMPI_Cart_map(MPI_Comm comm, int ndims, const int dims[], + const int periods[], int *newrank); +OMPI_DECLSPEC int PMPI_Cart_rank(MPI_Comm comm, const int coords[], int *rank); +OMPI_DECLSPEC int PMPI_Cart_shift(MPI_Comm comm, int direction, int disp, + int *rank_source, int *rank_dest); +OMPI_DECLSPEC int PMPI_Cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *new_comm); +OMPI_DECLSPEC int PMPI_Cartdim_get(MPI_Comm comm, int *ndims); +OMPI_DECLSPEC int PMPI_Close_port(const char *port_name); +OMPI_DECLSPEC int PMPI_Comm_accept(const char *port_name, MPI_Info info, int root, + MPI_Comm comm, MPI_Comm *newcomm); +OMPI_DECLSPEC MPI_Fint PMPI_Comm_c2f(MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Comm_call_errhandler(MPI_Comm comm, int errorcode); +OMPI_DECLSPEC int PMPI_Comm_compare(MPI_Comm comm1, MPI_Comm comm2, int *result); +OMPI_DECLSPEC int PMPI_Comm_connect(const char *port_name, MPI_Info info, int root, + MPI_Comm comm, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_create_errhandler(MPI_Comm_errhandler_function *function, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_copy_attr_fn, + MPI_Comm_delete_attr_function *comm_delete_attr_fn, + int *comm_keyval, void *extra_state); +OMPI_DECLSPEC int PMPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_create_from_group(MPI_Group group, const char *tag, MPI_Info info, + MPI_Errhandler errhandler, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval); +OMPI_DECLSPEC int PMPI_Comm_disconnect(MPI_Comm *comm); +OMPI_DECLSPEC int PMPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_idup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm, + MPI_Request *request); +OMPI_DECLSPEC MPI_Comm PMPI_Comm_f2c(MPI_Fint comm); +OMPI_DECLSPEC int PMPI_Comm_free_keyval(int *comm_keyval); +OMPI_DECLSPEC int PMPI_Comm_free(MPI_Comm *comm); +OMPI_DECLSPEC int PMPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, + void *attribute_val, int *flag); +OMPI_DECLSPEC int PMPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *erhandler); +OMPI_DECLSPEC int PMPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_Comm_get_name(MPI_Comm comm, char *comm_name, int *resultlen); +OMPI_DECLSPEC int PMPI_Comm_get_parent(MPI_Comm *parent); +OMPI_DECLSPEC int PMPI_Comm_group(MPI_Comm comm, MPI_Group *group); +OMPI_DECLSPEC int PMPI_Comm_join(int fd, MPI_Comm *intercomm); +OMPI_DECLSPEC int PMPI_Comm_rank(MPI_Comm comm, int *rank); +OMPI_DECLSPEC int PMPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group); +OMPI_DECLSPEC int PMPI_Comm_remote_size(MPI_Comm comm, int *size); +OMPI_DECLSPEC int PMPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val); +OMPI_DECLSPEC int PMPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler); +OMPI_DECLSPEC int PMPI_Comm_set_info(MPI_Comm comm, MPI_Info info); +OMPI_DECLSPEC int PMPI_Comm_set_name(MPI_Comm comm, const char *comm_name); +OMPI_DECLSPEC int PMPI_Comm_size(MPI_Comm comm, int *size); +OMPI_DECLSPEC int PMPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info, + int root, MPI_Comm comm, MPI_Comm *intercomm, + int array_of_errcodes[]); +OMPI_DECLSPEC int PMPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_of_argv[], + const int array_of_maxprocs[], const MPI_Info array_of_info[], + int root, MPI_Comm comm, MPI_Comm *intercomm, + int array_of_errcodes[]); +OMPI_DECLSPEC int PMPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_test_inter(MPI_Comm comm, int *flag); +OMPI_DECLSPEC int PMPI_Compare_and_swap(const void *origin_addr, const void *compare_addr, + void *result_addr, MPI_Datatype datatype, int target_rank, + MPI_Aint target_disp, MPI_Win win); +OMPI_DECLSPEC int PMPI_Dims_create(int nnodes, int ndims, int dims[]); +OMPI_DECLSPEC MPI_Fint PMPI_Errhandler_c2f(MPI_Errhandler errhandler); +OMPI_DECLSPEC MPI_Errhandler PMPI_Errhandler_f2c(MPI_Fint errhandler); +OMPI_DECLSPEC int PMPI_Errhandler_free(MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_Error_class(int errorcode, int *errorclass); +OMPI_DECLSPEC int PMPI_Error_string(int errorcode, char *string, int *resultlen); +OMPI_DECLSPEC int PMPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Exscan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Fetch_and_op(const void *origin_addr, void *result_addr, MPI_Datatype datatype, + int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win); +OMPI_DECLSPEC MPI_Fint PMPI_File_c2f(MPI_File file); +OMPI_DECLSPEC MPI_File PMPI_File_f2c(MPI_Fint file); +OMPI_DECLSPEC int PMPI_File_call_errhandler(MPI_File fh, int errorcode); +OMPI_DECLSPEC int PMPI_File_create_errhandler(MPI_File_errhandler_function *function, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_File_set_errhandler( MPI_File file, MPI_Errhandler errhandler); +OMPI_DECLSPEC int PMPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_File_open(MPI_Comm comm, const char *filename, int amode, + MPI_Info info, MPI_File *fh); +OMPI_DECLSPEC int PMPI_File_close(MPI_File *fh); +OMPI_DECLSPEC int PMPI_File_delete(const char *filename, MPI_Info info); +OMPI_DECLSPEC int PMPI_File_set_size(MPI_File fh, MPI_Offset size); +OMPI_DECLSPEC int PMPI_File_preallocate(MPI_File fh, MPI_Offset size); +OMPI_DECLSPEC int PMPI_File_get_size(MPI_File fh, MPI_Offset *size); +OMPI_DECLSPEC int PMPI_File_get_group(MPI_File fh, MPI_Group *group); +OMPI_DECLSPEC int PMPI_File_get_amode(MPI_File fh, int *amode); +OMPI_DECLSPEC int PMPI_File_set_info(MPI_File fh, MPI_Info info); +OMPI_DECLSPEC int PMPI_File_get_info(MPI_File fh, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, + MPI_Datatype filetype, const char *datarep, MPI_Info info); +OMPI_DECLSPEC int PMPI_File_get_view(MPI_File fh, MPI_Offset *disp, + MPI_Datatype *etype, + MPI_Datatype *filetype, char *datarep); +OMPI_DECLSPEC int PMPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write_at_all(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iread_at_all(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iwrite_at_all(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_read(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_read_all(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write_all(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_iread(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iwrite(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iread_all(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iwrite_all(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_seek(MPI_File fh, MPI_Offset offset, int whence); +OMPI_DECLSPEC int PMPI_File_get_position(MPI_File fh, MPI_Offset *offset); +OMPI_DECLSPEC int PMPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, + MPI_Offset *disp); +OMPI_DECLSPEC int PMPI_File_read_shared(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write_shared(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_iread_shared(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iwrite_shared(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_read_ordered(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write_ordered(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence); +OMPI_DECLSPEC int PMPI_File_get_position_shared(MPI_File fh, MPI_Offset *offset); +OMPI_DECLSPEC int PMPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype); +OMPI_DECLSPEC int PMPI_File_read_at_all_end(MPI_File fh, void *buf, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype); +OMPI_DECLSPEC int PMPI_File_write_at_all_end(MPI_File fh, const void *buf, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_read_all_begin(MPI_File fh, void *buf, int count, + MPI_Datatype datatype); +OMPI_DECLSPEC int PMPI_File_read_all_end(MPI_File fh, void *buf, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write_all_begin(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype); +OMPI_DECLSPEC int PMPI_File_write_all_end(MPI_File fh, const void *buf, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_read_ordered_begin(MPI_File fh, void *buf, int count, + MPI_Datatype datatype); +OMPI_DECLSPEC int PMPI_File_read_ordered_end(MPI_File fh, void *buf, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_write_ordered_begin(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype); +OMPI_DECLSPEC int PMPI_File_write_ordered_end(MPI_File fh, const void *buf, MPI_Status *status); +OMPI_DECLSPEC int PMPI_File_get_type_extent(MPI_File fh, MPI_Datatype datatype, + MPI_Aint *extent); +OMPI_DECLSPEC int PMPI_File_set_atomicity(MPI_File fh, int flag); +OMPI_DECLSPEC int PMPI_File_get_atomicity(MPI_File fh, int *flag); +OMPI_DECLSPEC int PMPI_File_sync(MPI_File fh); +OMPI_DECLSPEC int PMPI_Finalize(void); +OMPI_DECLSPEC int PMPI_Finalized(int *flag); +OMPI_DECLSPEC int PMPI_Free_mem(void *base); +OMPI_DECLSPEC int PMPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, int root, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Get_address(const void *location, MPI_Aint *address); +OMPI_DECLSPEC int PMPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count); +OMPI_DECLSPEC int PMPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, + int *count); +OMPI_DECLSPEC int PMPI_Get_elements_x(const MPI_Status *status, MPI_Datatype datatype, + MPI_Count *count); +OMPI_DECLSPEC int PMPI_Get(void *origin_addr, int origin_count, + MPI_Datatype origin_datatype, int target_rank, + MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Win win); +OMPI_DECLSPEC int PMPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + void *result_addr, int result_count, MPI_Datatype result_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win); +OMPI_DECLSPEC int PMPI_Get_library_version(char *version, int *resultlen); +OMPI_DECLSPEC int PMPI_Get_processor_name(char *name, int *resultlen); +OMPI_DECLSPEC int PMPI_Get_version(int *version, int *subversion); +OMPI_DECLSPEC int PMPI_Graph_create(MPI_Comm comm_old, int nnodes, const int index[], + const int edges[], int reorder, MPI_Comm *comm_graph); +OMPI_DECLSPEC int PMPI_Graph_get(MPI_Comm comm, int maxindex, int maxedges, + int index[], int edges[]); +OMPI_DECLSPEC int PMPI_Graph_map(MPI_Comm comm, int nnodes, const int index[], const int edges[], + int *newrank); +OMPI_DECLSPEC int PMPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors); +OMPI_DECLSPEC int PMPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors, + int neighbors[]); +OMPI_DECLSPEC int PMPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges); +OMPI_DECLSPEC int PMPI_Grequest_complete(MPI_Request request); +OMPI_DECLSPEC int PMPI_Grequest_start(MPI_Grequest_query_function *query_fn, + MPI_Grequest_free_function *free_fn, + MPI_Grequest_cancel_function *cancel_fn, + void *extra_state, MPI_Request *request); +OMPI_DECLSPEC MPI_Fint PMPI_Group_c2f(MPI_Group group); +OMPI_DECLSPEC int PMPI_Group_compare(MPI_Group group1, MPI_Group group2, int *result); +OMPI_DECLSPEC int PMPI_Group_difference(MPI_Group group1, MPI_Group group2, + MPI_Group *newgroup); +OMPI_DECLSPEC int PMPI_Group_excl(MPI_Group group, int n, const int ranks[], + MPI_Group *newgroup); +OMPI_DECLSPEC MPI_Group PMPI_Group_f2c(MPI_Fint group); +OMPI_DECLSPEC int PMPI_Group_free(MPI_Group *group); +OMPI_DECLSPEC int PMPI_Group_from_session_pset (MPI_Session session, const char *pset_name, MPI_Group *newgroup); +OMPI_DECLSPEC int PMPI_Group_incl(MPI_Group group, int n, const int ranks[], + MPI_Group *newgroup); +OMPI_DECLSPEC int PMPI_Group_intersection(MPI_Group group1, MPI_Group group2, + MPI_Group *newgroup); +OMPI_DECLSPEC int PMPI_Group_range_excl(MPI_Group group, int n, int ranges[][3], + MPI_Group *newgroup); +OMPI_DECLSPEC int PMPI_Group_range_incl(MPI_Group group, int n, int ranges[][3], + MPI_Group *newgroup); +OMPI_DECLSPEC int PMPI_Group_rank(MPI_Group group, int *rank); +OMPI_DECLSPEC int PMPI_Group_size(MPI_Group group, int *size); +OMPI_DECLSPEC int PMPI_Group_translate_ranks(MPI_Group group1, int n, const int ranks1[], + MPI_Group group2, int ranks2[]); +OMPI_DECLSPEC int PMPI_Group_union(MPI_Group group1, MPI_Group group2, + MPI_Group *newgroup); +OMPI_DECLSPEC int PMPI_Ibsend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Improbe(int source, int tag, MPI_Comm comm, + int *flag, MPI_Message *message, + MPI_Status *status); +OMPI_DECLSPEC int PMPI_Imrecv(void *buf, int count, MPI_Datatype type, + MPI_Message *message, MPI_Request *request); +OMPI_DECLSPEC MPI_Fint PMPI_Info_c2f(MPI_Info info); +OMPI_DECLSPEC int PMPI_Info_create(MPI_Info *info); +OMPI_DECLSPEC int PMPI_Info_delete(MPI_Info info, const char *key); +OMPI_DECLSPEC int PMPI_Info_dup(MPI_Info info, MPI_Info *newinfo); +OMPI_DECLSPEC MPI_Info PMPI_Info_f2c(MPI_Fint info); +OMPI_DECLSPEC int PMPI_Info_free(MPI_Info *info); +OMPI_DECLSPEC int PMPI_Info_get(MPI_Info info, const char *key, int valuelen, + char *value, int *flag); +OMPI_DECLSPEC int PMPI_Info_get_nkeys(MPI_Info info, int *nkeys); +OMPI_DECLSPEC int PMPI_Info_get_nthkey(MPI_Info info, int n, char *key); +OMPI_DECLSPEC int PMPI_Info_get_string(MPI_Info info, const char *key, int *buflen, + char *value, int *flag); +OMPI_DECLSPEC int PMPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, + int *flag); +OMPI_DECLSPEC int PMPI_Info_set(MPI_Info info, const char *key, const char *value); +OMPI_DECLSPEC int PMPI_Init(int *argc, char ***argv); +OMPI_DECLSPEC int PMPI_Initialized(int *flag); +OMPI_DECLSPEC int PMPI_Init_thread(int *argc, char ***argv, int required, + int *provided); +OMPI_DECLSPEC int PMPI_Intercomm_create(MPI_Comm local_comm, int local_leader, + MPI_Comm bridge_comm, int remote_leader, + int tag, MPI_Comm *newintercomm); +OMPI_DECLSPEC int PMPI_Intercomm_create_from_groups (MPI_Group local_group, int local_leader, MPI_Group remote_group, + int remote_leader, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newintercomm); +OMPI_DECLSPEC int PMPI_Intercomm_merge(MPI_Comm intercomm, int high, + MPI_Comm *newintercomm); +OMPI_DECLSPEC int PMPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, + MPI_Status *status); +OMPI_DECLSPEC int PMPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Isendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Isendrecv_replace(void * buf, int count, MPI_Datatype datatype, + int dest, int sendtag, int source, int recvtag, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Precv_init(void* buf, int partitions, MPI_Count count, + MPI_Datatype datatype, int source, int tag, MPI_Comm comm, + MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Psend_init(const void* buf, int partitions, MPI_Count count, + MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, + MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Pready(int partitions, MPI_Request request); +OMPI_DECLSPEC int PMPI_Pready_range(int partition_low, int partition_high, + MPI_Request request); +OMPI_DECLSPEC int PMPI_Pready_list(int length, int partition_list[], MPI_Request request); +OMPI_DECLSPEC int PMPI_Parrived(MPI_Request request, MPI_Count partition, int *flag); +OMPI_DECLSPEC int PMPI_Is_thread_main(int *flag); +OMPI_DECLSPEC int PMPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name); +OMPI_DECLSPEC MPI_Fint PMPI_Message_c2f(MPI_Message message); +OMPI_DECLSPEC MPI_Message PMPI_Message_f2c(MPI_Fint message); +OMPI_DECLSPEC int PMPI_Mprobe(int source, int tag, MPI_Comm comm, + MPI_Message *message, + MPI_Status *status); +OMPI_DECLSPEC int PMPI_Mrecv(void *buf, int count, MPI_Datatype type, + MPI_Message *message, MPI_Status *status); +OMPI_DECLSPEC int PMPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], + MPI_Datatype recvtype, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC MPI_Fint PMPI_Op_c2f(MPI_Op op); +OMPI_DECLSPEC int PMPI_Op_commutative(MPI_Op op, int *commute); +OMPI_DECLSPEC int PMPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op); +OMPI_DECLSPEC int PMPI_Open_port(MPI_Info info, char *port_name); +OMPI_DECLSPEC MPI_Op PMPI_Op_f2c(MPI_Fint op); +OMPI_DECLSPEC int PMPI_Op_free(MPI_Op *op); +OMPI_DECLSPEC int PMPI_Pack_external(const char datarep[], const void *inbuf, int incount, + MPI_Datatype datatype, void *outbuf, + MPI_Aint outsize, MPI_Aint *position); +OMPI_DECLSPEC int PMPI_Pack_external_size(const char datarep[], int incount, + MPI_Datatype datatype, MPI_Aint *size); +OMPI_DECLSPEC int PMPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, + void *outbuf, int outsize, int *position, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Pack_size(int incount, MPI_Datatype datatype, MPI_Comm comm, + int *size); +OMPI_DECLSPEC int PMPI_Pcontrol(const int level, ...); +OMPI_DECLSPEC int PMPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC int PMPI_Publish_name(const char *service_name, MPI_Info info, + const char *port_name); +OMPI_DECLSPEC int PMPI_Put(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Win win); +OMPI_DECLSPEC int PMPI_Query_thread(int *provided); +OMPI_DECLSPEC int PMPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, + int tag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC int PMPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_local(const void *inbuf, void *inoutbuf, int count, + MPI_Datatype datatype, MPI_Op op); +OMPI_DECLSPEC int PMPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Register_datarep(const char *datarep, + MPI_Datarep_conversion_function *read_conversion_fn, + MPI_Datarep_conversion_function *write_conversion_fn, + MPI_Datarep_extent_function *dtype_file_extent_fn, + void *extra_state); +OMPI_DECLSPEC MPI_Fint PMPI_Request_c2f(MPI_Request request); +OMPI_DECLSPEC MPI_Request PMPI_Request_f2c(MPI_Fint request); +OMPI_DECLSPEC int PMPI_Request_free(MPI_Request *request); +OMPI_DECLSPEC int PMPI_Request_get_status(MPI_Request request, int *flag, + MPI_Status *status); +OMPI_DECLSPEC int PMPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, + MPI_Win win, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + void *result_addr, int result_count, MPI_Datatype result_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op, + MPI_Win win, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_cout, + MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Rsend(const void *ibuf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, + MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scan_init(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Scatterv_init(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, + int root, MPI_Comm comm, MPI_Info info, MPI_Request *request); +OMPI_DECLSPEC int PMPI_Send_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, + MPI_Request *request); +OMPI_DECLSPEC int PMPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, + int dest, int sendtag, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, + MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC int PMPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, + int dest, int sendtag, int source, int recvtag, + MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC MPI_Fint PMPI_Session_c2f (const MPI_Session session); +OMPI_DECLSPEC int PMPI_Session_create_errhandler (MPI_Session_errhandler_function *session_errhandler_fn, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_Session_finalize (MPI_Session *session); +OMPI_DECLSPEC int PMPI_Session_get_info (MPI_Session session, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_Session_get_num_psets (MPI_Session session, MPI_Info info, int *npset_names); +OMPI_DECLSPEC int PMPI_Session_get_nth_pset (MPI_Session session, MPI_Info info, int n, int *len, char *pset_name); +OMPI_DECLSPEC int PMPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, + MPI_Session *session); +OMPI_DECLSPEC MPI_Session PMPI_Session_f2c (MPI_Fint session); +OMPI_DECLSPEC int PMPI_Session_set_info (MPI_Session session, MPI_Info info); +OMPI_DECLSPEC int PMPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, + MPI_Request *request); +OMPI_DECLSPEC int PMPI_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Start(MPI_Request *request); +OMPI_DECLSPEC int PMPI_Startall(int count, MPI_Request array_of_requests[]); +OMPI_DECLSPEC int PMPI_Status_c2f(const MPI_Status *c_status, MPI_Fint *f_status); +OMPI_DECLSPEC int PMPI_Status_c2f08(const MPI_Status *c_status, MPI_F08_status *f08_status); +OMPI_DECLSPEC int PMPI_Status_f082f(const MPI_F08_status *f08_status, MPI_Fint *f_status); +OMPI_DECLSPEC int PMPI_Status_f082c(const MPI_F08_status *f08_status, MPI_Status *c_status); +OMPI_DECLSPEC int PMPI_Status_f2c(const MPI_Fint *f_status, MPI_Status *c_status); +OMPI_DECLSPEC int PMPI_Status_f2f08(const MPI_Fint *f_status, MPI_F08_status *f08_status); +OMPI_DECLSPEC int PMPI_Status_set_cancelled(MPI_Status *status, int flag); +OMPI_DECLSPEC int PMPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype, + int count); +OMPI_DECLSPEC int PMPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, + MPI_Count count); +OMPI_DECLSPEC int PMPI_Testall(int count, MPI_Request array_of_requests[], int *flag, + MPI_Status array_of_statuses[]); +OMPI_DECLSPEC int PMPI_Testany(int count, MPI_Request array_of_requests[], int *index, int *flag, MPI_Status *status); +OMPI_DECLSPEC int PMPI_Test(MPI_Request *request, int *flag, MPI_Status *status); +OMPI_DECLSPEC int PMPI_Test_cancelled(const MPI_Status *status, int *flag); +OMPI_DECLSPEC int PMPI_Testsome(int incount, MPI_Request array_of_requests[], + int *outcount, int array_of_indices[], + MPI_Status array_of_statuses[]); +OMPI_DECLSPEC int PMPI_Topo_test(MPI_Comm comm, int *status); +OMPI_DECLSPEC MPI_Fint PMPI_Type_c2f(MPI_Datatype datatype); +OMPI_DECLSPEC int PMPI_Type_commit(MPI_Datatype *type); +OMPI_DECLSPEC int PMPI_Type_contiguous(int count, MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_darray(int size, int rank, int ndims, + const int gsize_array[], const int distrib_array[], + const int darg_array[], const int psize_array[], + int order, MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_f90_integer(int r, MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_hindexed(int count, const int array_of_blocklengths[], + const MPI_Aint array_of_displacements[], + MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_hvector(int count, int blocklength, MPI_Aint stride, + MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_keyval(MPI_Type_copy_attr_function *type_copy_attr_fn, + MPI_Type_delete_attr_function *type_delete_attr_fn, + int *type_keyval, void *extra_state); +OMPI_DECLSPEC int PMPI_Type_create_hindexed_block(int count, int blocklength, + const MPI_Aint array_of_displacements[], + MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_indexed_block(int count, int blocklength, + const int array_of_displacements[], + MPI_Datatype oldtype, + MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_struct(int count, const int array_of_block_lengths[], + const MPI_Aint array_of_displacements[], + const MPI_Datatype array_of_types[], + MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_subarray(int ndims, const int size_array[], const int subsize_array[], + const int start_array[], int order, + MPI_Datatype oldtype, MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, + MPI_Aint extent, MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_delete_attr(MPI_Datatype type, int type_keyval); +OMPI_DECLSPEC int PMPI_Type_dup(MPI_Datatype type, MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_free(MPI_Datatype *type); +OMPI_DECLSPEC int PMPI_Type_free_keyval(int *type_keyval); +OMPI_DECLSPEC MPI_Datatype PMPI_Type_f2c(MPI_Fint datatype); +OMPI_DECLSPEC int PMPI_Type_get_attr(MPI_Datatype type, int type_keyval, + void *attribute_val, int *flag); +OMPI_DECLSPEC int PMPI_Type_get_contents(MPI_Datatype mtype, int max_integers, + int max_addresses, int max_datatypes, + int array_of_integers[], + MPI_Aint array_of_addresses[], + MPI_Datatype array_of_datatypes[]); +OMPI_DECLSPEC int PMPI_Type_get_envelope(MPI_Datatype type, int *num_integers, + int *num_addresses, int *num_datatypes, + int *combiner); +OMPI_DECLSPEC int PMPI_Type_get_extent(MPI_Datatype type, MPI_Aint *lb, + MPI_Aint *extent); +OMPI_DECLSPEC int PMPI_Type_get_extent_x(MPI_Datatype type, MPI_Count *lb, + MPI_Count *extent); +OMPI_DECLSPEC int PMPI_Type_get_name(MPI_Datatype type, char *type_name, + int *resultlen); +OMPI_DECLSPEC int PMPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb, + MPI_Aint *true_extent); +OMPI_DECLSPEC int PMPI_Type_get_true_extent_x(MPI_Datatype datatype, MPI_Count *true_lb, + MPI_Count *true_extent); +OMPI_DECLSPEC int PMPI_Type_indexed(int count, const int array_of_blocklengths[], + const int array_of_displacements[], + MPI_Datatype oldtype, MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Type_match_size(int typeclass, int size, MPI_Datatype *type); +OMPI_DECLSPEC int PMPI_Type_set_attr(MPI_Datatype type, int type_keyval, + void *attr_val); +OMPI_DECLSPEC int PMPI_Type_set_name(MPI_Datatype type, const char *type_name); +OMPI_DECLSPEC int PMPI_Type_size(MPI_Datatype type, int *size); +OMPI_DECLSPEC int PMPI_Type_size_x(MPI_Datatype type, MPI_Count *size); +OMPI_DECLSPEC int PMPI_Type_vector(int count, int blocklength, int stride, + MPI_Datatype oldtype, MPI_Datatype *newtype); +OMPI_DECLSPEC int PMPI_Unpack(const void *inbuf, int insize, int *position, + void *outbuf, int outcount, MPI_Datatype datatype, + MPI_Comm comm); +OMPI_DECLSPEC int PMPI_Unpublish_name(const char *service_name, MPI_Info info, + const char *port_name); +OMPI_DECLSPEC int PMPI_Unpack_external (const char datarep[], const void *inbuf, MPI_Aint insize, + MPI_Aint *position, void *outbuf, int outcount, + MPI_Datatype datatype); +OMPI_DECLSPEC int PMPI_Waitall(int count, MPI_Request array_of_requests[], + MPI_Status array_of_statuses[]); +OMPI_DECLSPEC int PMPI_Waitany(int count, MPI_Request array_of_requests[], + int *index, MPI_Status *status); +OMPI_DECLSPEC int PMPI_Wait(MPI_Request *request, MPI_Status *status); +OMPI_DECLSPEC int PMPI_Waitsome(int incount, MPI_Request array_of_requests[], + int *outcount, int array_of_indices[], + MPI_Status array_of_statuses[]); +OMPI_DECLSPEC int PMPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info, + MPI_Comm comm, void *baseptr, MPI_Win *win); +OMPI_DECLSPEC int PMPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, + MPI_Comm comm, void *baseptr, MPI_Win *win); +OMPI_DECLSPEC int PMPI_Win_attach(MPI_Win win, void *base, MPI_Aint size); +OMPI_DECLSPEC MPI_Fint PMPI_Win_c2f(MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_call_errhandler(MPI_Win win, int errorcode); +OMPI_DECLSPEC int PMPI_Win_complete(MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_create(void *base, MPI_Aint size, int disp_unit, + MPI_Info info, MPI_Comm comm, MPI_Win *win); +OMPI_DECLSPEC int PMPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win); +OMPI_DECLSPEC int PMPI_Win_create_errhandler(MPI_Win_errhandler_function *function, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn, + MPI_Win_delete_attr_function *win_delete_attr_fn, + int *win_keyval, void *extra_state); +OMPI_DECLSPEC int PMPI_Win_delete_attr(MPI_Win win, int win_keyval); +OMPI_DECLSPEC int PMPI_Win_detach(MPI_Win win, const void *base); +OMPI_DECLSPEC MPI_Win PMPI_Win_f2c(MPI_Fint win); +OMPI_DECLSPEC int PMPI_Win_fence(int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_flush(int rank, MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_flush_all(MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_flush_local(int rank, MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_flush_local_all(MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_free(MPI_Win *win); +OMPI_DECLSPEC int PMPI_Win_free_keyval(int *win_keyval); +OMPI_DECLSPEC int PMPI_Win_get_attr(MPI_Win win, int win_keyval, + void *attribute_val, int *flag); +OMPI_DECLSPEC int PMPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_Win_get_group(MPI_Win win, MPI_Group *group); +OMPI_DECLSPEC int PMPI_Win_get_info(MPI_Win win, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); +OMPI_DECLSPEC int PMPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_lock_all(int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_post(MPI_Group group, int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val); +OMPI_DECLSPEC int PMPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler); +OMPI_DECLSPEC int PMPI_Win_set_info(MPI_Win win, MPI_Info info); +OMPI_DECLSPEC int PMPI_Win_set_name(MPI_Win win, const char *win_name); +OMPI_DECLSPEC int PMPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, void *baseptr); +OMPI_DECLSPEC int PMPI_Win_start(MPI_Group group, int mpi_assert, MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_sync(MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_test(MPI_Win win, int *flag); +OMPI_DECLSPEC int PMPI_Win_unlock(int rank, MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_unlock_all(MPI_Win win); +OMPI_DECLSPEC int PMPI_Win_wait(MPI_Win win); +OMPI_DECLSPEC double PMPI_Wtick(void); +OMPI_DECLSPEC double PMPI_Wtime(void); +OMPI_DECLSPEC int PMPI_T_init_thread (int required, int *provided); +OMPI_DECLSPEC int PMPI_T_finalize (void); +OMPI_DECLSPEC int PMPI_T_cvar_get_num (int *num_cvar); +OMPI_DECLSPEC int PMPI_T_cvar_get_info (int cvar_index, char *name, int *name_len, + int *verbosity, MPI_Datatype *datatype, + MPI_T_enum *enumtype, char *desc, + int *desc_len, int *bind, int *scope); +OMPI_DECLSPEC int PMPI_T_cvar_get_index (const char *name, int *cvar_index); +OMPI_DECLSPEC int PMPI_T_cvar_handle_alloc (int cvar_index, void *obj_handle, + MPI_T_cvar_handle *handle, int *count); +OMPI_DECLSPEC int PMPI_T_cvar_handle_free (MPI_T_cvar_handle *handle); +OMPI_DECLSPEC int PMPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf); +OMPI_DECLSPEC int PMPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf); +OMPI_DECLSPEC int PMPI_T_category_get_num(int *num_cat); +OMPI_DECLSPEC int PMPI_T_category_get_info(int cat_index, char *name, int *name_len, + char *desc, int *desc_len, int *num_cvars, + int *num_pvars, int *num_categories); +OMPI_DECLSPEC int PMPI_T_category_get_index (const char *name, int *category_index); +OMPI_DECLSPEC int PMPI_T_category_get_cvars(int cat_index, int len, int indices[]); +OMPI_DECLSPEC int PMPI_T_category_get_pvars(int cat_index, int len, int indices[]); +OMPI_DECLSPEC int PMPI_T_category_get_categories(int cat_index, int len, int indices[]); +OMPI_DECLSPEC int PMPI_T_category_changed(int *stamp); + +OMPI_DECLSPEC int PMPI_T_pvar_get_num(int *num_pvar); +OMPI_DECLSPEC int PMPI_T_pvar_get_info(int pvar_index, char *name, int *name_len, + int *verbosity, int *var_class, MPI_Datatype *datatype, + MPI_T_enum *enumtype, char *desc, int *desc_len, int *bind, + int *readonly, int *continuous, int *atomic); +OMPI_DECLSPEC int PMPI_T_pvar_get_index (const char *name, int var_class, int *pvar_index); +OMPI_DECLSPEC int PMPI_T_pvar_session_create(MPI_T_pvar_session *session); +OMPI_DECLSPEC int PMPI_T_pvar_session_free(MPI_T_pvar_session *session); +OMPI_DECLSPEC int PMPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index, + void *obj_handle, MPI_T_pvar_handle *handle, int *count); +OMPI_DECLSPEC int PMPI_T_pvar_handle_free(MPI_T_pvar_session session, MPI_T_pvar_handle *handle); +OMPI_DECLSPEC int PMPI_T_pvar_start(MPI_T_pvar_session session, MPI_T_pvar_handle handle); +OMPI_DECLSPEC int PMPI_T_pvar_stop(MPI_T_pvar_session session, MPI_T_pvar_handle handle); +OMPI_DECLSPEC int PMPI_T_pvar_read(MPI_T_pvar_session session, MPI_T_pvar_handle handle, + void *buf); +OMPI_DECLSPEC int PMPI_T_pvar_write(MPI_T_pvar_session session, MPI_T_pvar_handle handle, + const void *buf); +OMPI_DECLSPEC int PMPI_T_pvar_reset(MPI_T_pvar_session session, MPI_T_pvar_handle handle); +OMPI_DECLSPEC int PMPI_T_pvar_readreset(MPI_T_pvar_session session, MPI_T_pvar_handle handle, + void *buf); +OMPI_DECLSPEC int PMPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len); +OMPI_DECLSPEC int PMPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name, + int *name_len); + + /* + * Tool MPI API + */ +OMPI_DECLSPEC int MPI_T_init_thread (int required, int *provided); +OMPI_DECLSPEC int MPI_T_finalize (void); +OMPI_DECLSPEC int MPI_T_cvar_get_num (int *num_cvar); +OMPI_DECLSPEC int MPI_T_cvar_get_info (int cvar_index, char *name, int *name_len, + int *verbosity, MPI_Datatype *datatype, + MPI_T_enum *enumtype, char *desc, + int *desc_len, int *bind, int *scope); +OMPI_DECLSPEC int MPI_T_cvar_get_index (const char *name, int *cvar_index); +OMPI_DECLSPEC int MPI_T_cvar_handle_alloc (int cvar_index, void *obj_handle, + MPI_T_cvar_handle *handle, int *count); +OMPI_DECLSPEC int MPI_T_cvar_handle_free (MPI_T_cvar_handle *handle); +OMPI_DECLSPEC int MPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf); +OMPI_DECLSPEC int MPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf); +OMPI_DECLSPEC int MPI_T_category_get_num(int *num_cat); +OMPI_DECLSPEC int MPI_T_category_get_info(int cat_index, char *name, int *name_len, + char *desc, int *desc_len, int *num_cvars, + int *num_pvars, int *num_categories); +OMPI_DECLSPEC int MPI_T_category_get_index (const char *name, int *category_index); +OMPI_DECLSPEC int MPI_T_category_get_cvars(int cat_index, int len, int indices[]); +OMPI_DECLSPEC int MPI_T_category_get_pvars(int cat_index, int len, int indices[]); +OMPI_DECLSPEC int MPI_T_category_get_categories(int cat_index, int len, int indices[]); +OMPI_DECLSPEC int MPI_T_category_changed(int *stamp); + +OMPI_DECLSPEC int MPI_T_pvar_get_num(int *num_pvar); +OMPI_DECLSPEC int MPI_T_pvar_get_info(int pvar_index, char *name, int *name_len, + int *verbosity, int *var_class, MPI_Datatype *datatype, + MPI_T_enum *enumtype, char *desc, int *desc_len, int *bind, + int *readonly, int *continuous, int *atomic); +OMPI_DECLSPEC int MPI_T_pvar_get_index (const char *name, int var_class, int *pvar_index); +OMPI_DECLSPEC int MPI_T_pvar_session_create(MPI_T_pvar_session *session); +OMPI_DECLSPEC int MPI_T_pvar_session_free(MPI_T_pvar_session *session); +OMPI_DECLSPEC int MPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index, + void *obj_handle, MPI_T_pvar_handle *handle, int *count); +OMPI_DECLSPEC int MPI_T_pvar_handle_free(MPI_T_pvar_session session, MPI_T_pvar_handle *handle); +OMPI_DECLSPEC int MPI_T_pvar_start(MPI_T_pvar_session session, MPI_T_pvar_handle handle); +OMPI_DECLSPEC int MPI_T_pvar_stop(MPI_T_pvar_session session, MPI_T_pvar_handle handle); +OMPI_DECLSPEC int MPI_T_pvar_read(MPI_T_pvar_session session, MPI_T_pvar_handle handle, + void *buf); +OMPI_DECLSPEC int MPI_T_pvar_write(MPI_T_pvar_session session, MPI_T_pvar_handle handle, + const void *buf); +OMPI_DECLSPEC int MPI_T_pvar_reset(MPI_T_pvar_session session, MPI_T_pvar_handle handle); +OMPI_DECLSPEC int MPI_T_pvar_readreset(MPI_T_pvar_session session, MPI_T_pvar_handle handle, + void *buf); +OMPI_DECLSPEC int MPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len); +OMPI_DECLSPEC int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name, + int *name_len); +/* + * Deprecated prototypes. Usage is discouraged, as these may be + * deleted in future versions of the MPI Standard. + */ +OMPI_DECLSPEC int MPI_Attr_delete(MPI_Comm comm, int keyval) + __mpi_interface_deprecated__("MPI_Attr_delete was deprecated in MPI-2.0; use MPI_Comm_delete_attr instead"); +OMPI_DECLSPEC int PMPI_Attr_delete(MPI_Comm comm, int keyval) + __mpi_interface_deprecated__("PMPI_Attr_delete was deprecated in MPI-2.0; use PMPI_Comm_delete_attr instead"); +OMPI_DECLSPEC int MPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag) + __mpi_interface_deprecated__("MPI_Attr_get was deprecated in MPI-2.0; use MPI_Comm_get_attr instead"); +OMPI_DECLSPEC int PMPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag) + __mpi_interface_deprecated__("PMPI_Attr_get was deprecated in MPI-2.0; use PMPI_Comm_get_attr instead"); +OMPI_DECLSPEC int MPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val) + __mpi_interface_deprecated__("MPI_Attr_put was deprecated in MPI-2.0; use MPI_Comm_set_attr instead"); +OMPI_DECLSPEC int PMPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val) + __mpi_interface_deprecated__("PMPI_Attr_put was deprecated in MPI-2.0; use PMPI_Comm_set_attr instead"); + +/* + * Even though MPI_Copy_function and MPI_Delete_function are + * deprecated, we do not use the attributes marking them as such, + * because otherwise the compiler will warn for all the functions that + * are declared using them (e.g., MPI_Keyval_create). + */ +typedef int (MPI_Copy_function)(MPI_Comm, int, void *, + void *, void *, int *); +/* MPI_Copy_function was deprecated in MPI-2.0; use MPI_Comm_copy_attr_function instead. */ +typedef int (MPI_Delete_function)(MPI_Comm, int, void *, void *); +/* MPI_Delete_function was deprecated in MPI-2.0; use MPI_Comm_delete_attr_function instead. */ +OMPI_DECLSPEC int MPI_Keyval_create(MPI_Copy_function *copy_fn, + MPI_Delete_function *delete_fn, + int *keyval, void *extra_state) + __mpi_interface_deprecated__("MPI_Keyval_create was deprecated in MPI-2.0; use MPI_Comm_create_keyval instead."); +OMPI_DECLSPEC int PMPI_Keyval_create(MPI_Copy_function *copy_fn, + MPI_Delete_function *delete_fn, + int *keyval, void *extra_state) + __mpi_interface_deprecated__("PMPI_Keyval_create was deprecated in MPI-2.0; use PMPI_Comm_create_keyval instead."); +OMPI_DECLSPEC int MPI_Keyval_free(int *keyval) + __mpi_interface_deprecated__("MPI_Keyval_free was deprecated in MPI-2.0; MPI_Comm_free_keyval instead."); +OMPI_DECLSPEC int PMPI_Keyval_free(int *keyval) + __mpi_interface_deprecated__("PMPI_Keyval_free was deprecated in MPI-2.0; PMPI_Comm_free_keyval instead."); + +#if !defined(OMPI_COMPILING_FORTRAN_WRAPPERS) +#define MPI_DUP_FN OMPI_C_MPI_DUP_FN +#endif +OMPI_DECLSPEC int OMPI_C_MPI_DUP_FN( MPI_Comm comm, int comm_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ) + __mpi_interface_deprecated__("MPI_DUP_FN was deprecated in MPI-2.0; use MPI_COMM_DUP_FN instead."); + +#if !defined(OMPI_COMPILING_FORTRAN_WRAPPERS) +#define MPI_NULL_COPY_FN OMPI_C_MPI_NULL_COPY_FN +#endif +OMPI_DECLSPEC int OMPI_C_MPI_NULL_COPY_FN( MPI_Comm comm, int comm_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ) + __mpi_interface_deprecated__("MPI_NULL_COPY_FN was deprecated in MPI-2.0; use MPI_COMM_NULL_COPY_FN instead."); + +#if !defined(OMPI_COMPILING_FORTRAN_WRAPPERS) +#define MPI_NULL_DELETE_FN OMPI_C_MPI_NULL_DELETE_FN +#endif +OMPI_DECLSPEC int OMPI_C_MPI_NULL_DELETE_FN( MPI_Comm comm, int comm_keyval, + void* attribute_val_out, + void* extra_state ) + __mpi_interface_deprecated__("MPI_NULL_DELETE_FN was deprecated in MPI-2.0; use MPI_COMM_NULL_DELETE_FN instead."); + +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) +/* + * Removed typedefs. These typedefs are only available if Open MPI + * was configured with --enable-mpi1-compatibility. + * + * These typedefs were formally removed from the MPI specification + * and should no longer be used in MPI applications. + * + * Even though MPI_Handler_function is removed, we do not use the + * attributes marking it as such, because otherwise the compiler + * will warn for all the functions that are declared using them + * (e.g., MPI_Errhandler_create). + */ +typedef void (MPI_Handler_function)(MPI_Comm *, int *, ...); +/* MPI_Handler_function was removed in MPI-3.0; use MPI_Comm_use_errhandler_function instead. */ + +/* + * Removed prototypes. These prototypes are only available if Open + * MPI was configured with --enable-mpi1-compatibility. + * + * These functions were formally removed from the MPI specification + * and should no longer be used in MPI applications. + */ +OMPI_DECLSPEC int MPI_Address(void *location, MPI_Aint *address) + __mpi_interface_removed__(MPI_Address, MPI_Get_address); +OMPI_DECLSPEC int PMPI_Address(void *location, MPI_Aint *address) + __mpi_interface_removed__(PMPI_Address, PMPI_Get_address); +OMPI_DECLSPEC int MPI_Errhandler_create(MPI_Handler_function *function, + MPI_Errhandler *errhandler) + __mpi_interface_removed__(MPI_Errhandler_create, MPI_Comm_create_errhandler); +OMPI_DECLSPEC int PMPI_Errhandler_create(MPI_Handler_function *function, + MPI_Errhandler *errhandler) + __mpi_interface_removed__(PMPI_Errhandler_create, PMPI_Comm_create_errhandler); +OMPI_DECLSPEC int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler) + __mpi_interface_removed__(MPI_Errhandler_get, MPI_Comm_get_errhandler); +OMPI_DECLSPEC int PMPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler) + __mpi_interface_removed__(PMPI_Errhandler_get, PMPI_Comm_get_errhandler); +OMPI_DECLSPEC int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler) + __mpi_interface_removed__(MPI_Errhandler_set, MPI_Comm_set_errhandler); +OMPI_DECLSPEC int PMPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler) + __mpi_interface_removed__(PMPI_Errhandler_set, PMPI_Comm_set_errhandler); +OMPI_DECLSPEC int MPI_Type_extent(MPI_Datatype type, MPI_Aint *extent) + __mpi_interface_removed__(MPI_Type_extent, MPI_Type_get_extent); +OMPI_DECLSPEC int PMPI_Type_extent(MPI_Datatype type, MPI_Aint *extent) + __mpi_interface_removed__(PMPI_Type_extent, PMPI_Type_get_extent); +OMPI_DECLSPEC int MPI_Type_hindexed(int count, int array_of_blocklengths[], + MPI_Aint array_of_displacements[], + MPI_Datatype oldtype, MPI_Datatype *newtype) + __mpi_interface_removed__(MPI_Type_hindexed, MPI_Type_create_hindexed); +OMPI_DECLSPEC int PMPI_Type_hindexed(int count, int array_of_blocklengths[], + MPI_Aint array_of_displacements[], + MPI_Datatype oldtype, MPI_Datatype *newtype) + __mpi_interface_removed__(PMPI_Type_hindexed, PMPI_Type_create_hindexed); +OMPI_DECLSPEC int MPI_Type_hvector(int count, int blocklength, MPI_Aint stride, + MPI_Datatype oldtype, MPI_Datatype *newtype) + __mpi_interface_removed__(MPI_Type_hvector, MPI_Type_create_hvector); +OMPI_DECLSPEC int PMPI_Type_hvector(int count, int blocklength, MPI_Aint stride, + MPI_Datatype oldtype, MPI_Datatype *newtype) + __mpi_interface_removed__(PMPI_Type_hvector, PMPI_Type_create_hvector); +OMPI_DECLSPEC int MPI_Type_lb(MPI_Datatype type, MPI_Aint *lb) + __mpi_interface_removed__(MPI_Type_lb, MPI_Type_get_extent); +OMPI_DECLSPEC int PMPI_Type_lb(MPI_Datatype type, MPI_Aint *lb) + __mpi_interface_removed__(PMPI_Type_lb, PMPI_Type_get_extent); +OMPI_DECLSPEC int MPI_Type_struct(int count, int array_of_blocklengths[], + MPI_Aint array_of_displacements[], + MPI_Datatype array_of_types[], + MPI_Datatype *newtype) + __mpi_interface_removed__(MPI_Type_struct, MPI_Type_create_struct); +OMPI_DECLSPEC int PMPI_Type_struct(int count, int array_of_blocklengths[], + MPI_Aint array_of_displacements[], + MPI_Datatype array_of_types[], + MPI_Datatype *newtype) + __mpi_interface_removed__(PMPI_Type_struct, PMPI_Type_create_struct); +OMPI_DECLSPEC int MPI_Type_ub(MPI_Datatype mtype, MPI_Aint *ub) + __mpi_interface_removed__(MPI_Type_ub, MPI_Type_get_extent); +OMPI_DECLSPEC int PMPI_Type_ub(MPI_Datatype mtype, MPI_Aint *ub) + __mpi_interface_removed__(PMPI_Type_ub, PMPI_Type_get_extent); +#endif /* !OMPI_OMIT_MPI1_COMPAT_DECLS */ + +#if OMPI_REMOVED_USE_STATIC_ASSERT +#define MPI_Address(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Address, MPI_Get_address) +#define MPI_Errhandler_create(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Errhandler_create, MPI_Comm_create_errhandler) +#define MPI_Errhandler_get(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Errhandler_get, MPI_Comm_get_errhandler) +#define MPI_Errhandler_set(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Errhandler_set, MPI_Comm_set_errhandler) +#define MPI_Type_extent(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Type_extent, MPI_Type_get_extent) +#define MPI_Type_hindexed(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Type_hindexed, MPI_Type_create_hindexed) +#define MPI_Type_hvector(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Type_hvector, MPI_Type_create_hvector) +#define MPI_Type_lb(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Type_lb, MPI_Type_get_extent) +#define MPI_Type_struct(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Type_struct, MPI_Type_create_struct) +#define MPI_Type_ub(...) THIS_FUNCTION_WAS_REMOVED_IN_MPI30(MPI_Type_ub, MPI_Type_get_extent) +#endif + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif /* OMPI_MPI_H */ diff --git a/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/ompi_config_minimal.h b/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/ompi_config_minimal.h new file mode 100644 index 00000000..88e4ccb9 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/ompi_config_minimal.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2021 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2021 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved. + * Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2011-2013 INRIA. All rights reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2019 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2021-2022 Google, LLC. All rights reserved. + * Copyright (c) 2021-2022 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/* + * Error classes and codes + * Do not change the values of these without also modifying mpif.h.in. + */ +#define MPI_SUCCESS 0 +#define MPI_ERR_BUFFER 1 +#define MPI_ERR_COUNT 2 +#define MPI_ERR_TYPE 3 +#define MPI_ERR_TAG 4 +#define MPI_ERR_COMM 5 +#define MPI_ERR_RANK 6 +#define MPI_ERR_REQUEST 7 +#define MPI_ERR_ROOT 8 +#define MPI_ERR_GROUP 9 +#define MPI_ERR_OP 10 +#define MPI_ERR_TOPOLOGY 11 +#define MPI_ERR_DIMS 12 +#define MPI_ERR_ARG 13 +#define MPI_ERR_UNKNOWN 14 +#define MPI_ERR_TRUNCATE 15 +#define MPI_ERR_OTHER 16 +#define MPI_ERR_INTERN 17 +#define MPI_ERR_IN_STATUS 18 +#define MPI_ERR_PENDING 19 +#define MPI_ERR_ACCESS 20 +#define MPI_ERR_AMODE 21 +#define MPI_ERR_ASSERT 22 +#define MPI_ERR_BAD_FILE 23 +#define MPI_ERR_BASE 24 +#define MPI_ERR_CONVERSION 25 +#define MPI_ERR_DISP 26 +#define MPI_ERR_DUP_DATAREP 27 +#define MPI_ERR_FILE_EXISTS 28 +#define MPI_ERR_FILE_IN_USE 29 +#define MPI_ERR_FILE 30 +#define MPI_ERR_INFO_KEY 31 +#define MPI_ERR_INFO_NOKEY 32 +#define MPI_ERR_INFO_VALUE 33 +#define MPI_ERR_INFO 34 +#define MPI_ERR_IO 35 +#define MPI_ERR_KEYVAL 36 +#define MPI_ERR_LOCKTYPE 37 +#define MPI_ERR_NAME 38 +#define MPI_ERR_NO_MEM 39 +#define MPI_ERR_NOT_SAME 40 +#define MPI_ERR_NO_SPACE 41 +#define MPI_ERR_NO_SUCH_FILE 42 +#define MPI_ERR_PORT 43 +#define MPI_ERR_QUOTA 44 +#define MPI_ERR_READ_ONLY 45 +#define MPI_ERR_RMA_CONFLICT 46 +#define MPI_ERR_RMA_SYNC 47 +#define MPI_ERR_SERVICE 48 +#define MPI_ERR_SIZE 49 +#define MPI_ERR_SPAWN 50 +#define MPI_ERR_UNSUPPORTED_DATAREP 51 +#define MPI_ERR_UNSUPPORTED_OPERATION 52 +#define MPI_ERR_WIN 53 + +#define MPI_T_ERR_MEMORY 54 +#define MPI_T_ERR_NOT_INITIALIZED 55 +#define MPI_T_ERR_CANNOT_INIT 56 +#define MPI_T_ERR_INVALID_INDEX 57 +#define MPI_T_ERR_INVALID_ITEM 58 +#define MPI_T_ERR_INVALID_HANDLE 59 +#define MPI_T_ERR_OUT_OF_HANDLES 60 +#define MPI_T_ERR_OUT_OF_SESSIONS 61 +#define MPI_T_ERR_INVALID_SESSION 62 +#define MPI_T_ERR_CVAR_SET_NOT_NOW 63 +#define MPI_T_ERR_CVAR_SET_NEVER 64 +#define MPI_T_ERR_PVAR_NO_STARTSTOP 65 +#define MPI_T_ERR_PVAR_NO_WRITE 66 +#define MPI_T_ERR_PVAR_NO_ATOMIC 67 +#define MPI_ERR_RMA_RANGE 68 +#define MPI_ERR_RMA_ATTACH 69 +#define MPI_ERR_RMA_FLAVOR 70 +#define MPI_ERR_RMA_SHARED 71 +#define MPI_T_ERR_INVALID 72 +#define MPI_T_ERR_INVALID_NAME 73 +#define MPI_ERR_PROC_ABORTED 74 + +/* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */ +#define MPI_ERR_PROC_FAILED 75 +#define MPI_ERR_PROC_FAILED_PENDING 76 +#define MPI_ERR_REVOKED 77 + +/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined + MPI_ERR_ code. Set the last code to allow some room for adding + error codes without breaking ABI. */ +#define MPI_ERR_LASTCODE 92 + +/* + * Comparison results. Don't change the order of these, the group + * comparison functions rely on it. + * Do not change the order of these without also modifying mpif.h.in. + */ +enum { + MPI_IDENT, + MPI_CONGRUENT, + MPI_SIMILAR, + MPI_UNEQUAL +}; + +/* + * MPI_Init_thread constants + * Do not change the order of these without also modifying mpif.h.in. + */ +enum { + MPI_THREAD_SINGLE, + MPI_THREAD_FUNNELED, + MPI_THREAD_SERIALIZED, + MPI_THREAD_MULTIPLE +}; + +/* + * Datatype combiners. + * Do not change the order of these without also modifying mpif.h.in. + * (see also mpif-common.h.fin). + */ +enum { + MPI_COMBINER_NAMED, + MPI_COMBINER_DUP, + MPI_COMBINER_CONTIGUOUS, + MPI_COMBINER_VECTOR, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_HVECTOR_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_HVECTOR, + MPI_COMBINER_INDEXED, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_HINDEXED_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_HINDEXED, + MPI_COMBINER_INDEXED_BLOCK, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_STRUCT_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_STRUCT, + MPI_COMBINER_SUBARRAY, + MPI_COMBINER_DARRAY, + MPI_COMBINER_F90_REAL, + MPI_COMBINER_F90_COMPLEX, + MPI_COMBINER_F90_INTEGER, + MPI_COMBINER_RESIZED, + MPI_COMBINER_HINDEXED_BLOCK +}; + +////MOCK ERRHANDELERS +#define OMPI_ERRHANDLER_INVOKE(comm,err,fname) err +#define OMPI_ERRHANDLER_NOHANDLE_INVOKE(err,fname) err + diff --git a/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/ompi_dims_create.c b/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/ompi_dims_create.c new file mode 100644 index 00000000..7353a4fb --- /dev/null +++ b/base/serial/impl/sp3mm4amg/commons/ompi_dims_create/ompi_dims_create.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * extraction from openMPI, mocking not needed dependencies for MPI_Dims_create function export: + * Copyright (c) 2022 Andrea Di Iorio + */ + +//#include "ompi_config.h" +#include "ompi_config_minimal.h" + +#include +#include +#include + +#include + +//#include "ompi/mpi/c/bindings.h" +//#include "ompi/runtime/params.h" +//#include "ompi/communicator/communicator.h" +//#include "ompi/errhandler/errhandler.h" + + +static const char FUNC_NAME[] = "MPI_Dims_create"; + +/* static functions */ +static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims); +static int getfactors(int num, int *nfators, int **factors); + + +/* + * This is a utility function, no need to have anything in the lower + * layer for this at all + * **** + * original manpage ... https://www.open-mpi.org/doc/v3.1/man3/MPI_Dims_create.3.php + * For Cartesian topologies, the function MPI_Dims_create helps the user select a balanced distribution of processes per coordinate direction, + * depending on the number of processes in the group to be balanced and optional constraints that can be specified by the user. + * One use is to partition all the processes (the size of MPI_COMM_WORLD’s group) into an n-dimensional topology. + * The entries in the array dims are set to describe a Cartesian grid with ndims dimensions and a total of nnodes nodes. + * The dimensions are set to be as close to each other as possible, using an appropriate divisibility algorithm. + * + * The caller may further constrain the operation of this routine by specifying elements of array dims. + * If dims[i] is set to a positive number, the routine will not modify the number of nodes in dimension i; only those entries where dims[i] = 0 are modified by the call. + * Negative input values of dims[i] are erroneous. An error will occur if nnodes is not a multiple of ((pi) over (i, dims[i] != 0)) dims[i]. + * For dims[i] set by the call, dims[i] will be ordered in nonincreasing order. Array dims is suitable for use as input to routine MPI_Cart_create. MPI_Dims_create is local. + * Example: + * + * dims + * before dims + * call function call on return + * ----------------------------------------------------- + * (0,0) MPI_Dims_create(6, 2, dims) (3,2) + * (0,0) MPI_Dims_create(7, 2, dims) (7,1) + * (0,3,0) MPI_Dims_create(6, 3, dims) (2,3,1) + * (0,3,0) MPI_Dims_create(7, 3, dims) erroneous call + * ------------------------------------------------------ + */ +int MPI_Dims_create(int nnodes, int ndims, int dims[]) +{ + int i; + int freeprocs; + int freedims; + int nfactors; + int *factors; + int *procs; + int *p; + int err; + + /*if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (0 > ndims) { + return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, + MPI_ERR_DIMS, FUNC_NAME); + } + + if ((0 != ndims) && (NULL == dims)) { + return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, + MPI_ERR_ARG, FUNC_NAME); + } + + if (1 > nnodes) { + return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, + MPI_ERR_DIMS, FUNC_NAME); + } + }*/ + assert( ndims > 0 && nnodes > 1 && NULL != dims ); + /* Get # of free-to-be-assigned processes and # of free dimensions */ + freeprocs = nnodes; + freedims = 0; + for (i = 0, p = dims; i < ndims; ++i,++p) { + if (*p == 0) { + ++freedims; + } else if ((*p < 0) || ((nnodes % *p) != 0)) { + return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_DIMS, + FUNC_NAME); + } else { + freeprocs /= *p; + } + } + + if (freedims == 0) { + if (freeprocs == 1) { + return MPI_SUCCESS; + } + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_DIMS, + FUNC_NAME); + } + + if (freeprocs == 1) { + for (i = 0; i < ndims; ++i, ++dims) { + if (*dims == 0) { + *dims = 1; + } + } + return MPI_SUCCESS; + } + + /* Factor the number of free processes */ + if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(err, + FUNC_NAME); + } + + /* Assign free processes to free dimensions */ + if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) { + free(factors); + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(err, + FUNC_NAME); + } + + /* Return assignment results */ + p = procs; + for (i = 0; i < ndims; ++i, ++dims) { + if (*dims == 0) { + *dims = *p++; + } + } + + free((char *) factors); + free((char *) procs); + + /* all done */ + return MPI_SUCCESS; +} + +/* + * assignnodes + * + * Function: - assign processes to dimensions + * - get "best-balanced" grid + * - greedy bin-packing algorithm used + * - sort dimensions in decreasing order + * - dimensions array dynamically allocated + * Accepts: - # of dimensions + * - # of prime factors + * - array of prime factors + * - ptr to array of dimensions (returned value) + * Returns: - 0 or ERROR + */ +static int +assignnodes(int ndim, int nfactor, int *pfacts, int **pdims) +{ + int *bins; + int i, j; + int n; + int f; + int *p; + int *pmin; + + if (0 >= ndim) { + return MPI_ERR_DIMS; + } + + /* Allocate and initialize the bins */ + bins = (int *) malloc((unsigned) ndim * sizeof(int)); + if (NULL == bins) { + return MPI_ERR_NO_MEM; + } + *pdims = bins; + + for (i = 0, p = bins; i < ndim; ++i, ++p) { + *p = 1; + } + + /* Loop assigning factors from the highest to the lowest */ + for (j = nfactor - 1; j >= 0; --j) { + f = pfacts[j]; + /* Assign a factor to the smallest bin */ + pmin = bins; + for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) { + if (*p < *pmin) { + pmin = p; + } + } + *pmin *= f; + } + + /* Sort dimensions in decreasing order (O(n^2) for now) */ + for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) { + for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) { + if (*p > *pmin) { + n = *p; + *p = *pmin; + *pmin = n; + } + } + } + + return MPI_SUCCESS; +} + +/* + * getfactors + * + * Function: - factorize a number + * Accepts: - number + * - # prime factors + * - array of prime factors + * Returns: - MPI_SUCCESS or ERROR + */ +static int +getfactors(int num, int *nfactors, int **factors) { + int size; + int d; + int i; + int sqrtnum; + + if(num < 2) { + (*nfactors) = 0; + (*factors) = NULL; + return MPI_SUCCESS; + } + /* Allocate the array of prime factors which cannot exceed log_2(num) entries */ + sqrtnum = ceil(sqrt(num)); + size = ceil(log(num) / log(2)); + *factors = (int *) malloc((unsigned) size * sizeof(int)); + + i = 0; + /* determine all occurences of factor 2 */ + while((num % 2) == 0) { + num /= 2; + (*factors)[i++] = 2; + } + /* determine all occurences of uneven prime numbers up to sqrt(num) */ + d = 3; + for(d = 3; (num > 1) && (d <= sqrtnum); d += 2) { + while((num % d) == 0) { + num /= d; + (*factors)[i++] = d; + } + } + /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */ + if(num != 1) { + (*factors)[i++] = num; + } + (*nfactors) = i; + return MPI_SUCCESS; +} + + +#ifdef TEST_MAIN +int main(int argc,char** argv){ + if (argc != 2) {fprintf(stderr,"usage: -> to fit in a 2D grid");return EXIT_FAILURE;} + int dims2[2] = {0,0}; + int n = atoi(argv[1]); + if (MPI_Dims_create(n,2,dims2)){return EXIT_FAILURE;} printf("%d 2D division:\t%d-%d\n",n,dims2[0],dims2[1]); +} +#endif //TEST_MAIN diff --git a/base/serial/impl/sp3mm4amg/commons/sparseUtilsGeneric.c b/base/serial/impl/sp3mm4amg/commons/sparseUtilsGeneric.c new file mode 100644 index 00000000..e3cea72b --- /dev/null +++ b/base/serial/impl/sp3mm4amg/commons/sparseUtilsGeneric.c @@ -0,0 +1,499 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include "macros.h" +#include "sparseMatrix.h" +#include "utils.h" + +/////////////// no offset --> SINGLE implementation functions +#ifndef SPARSE_UTILS_C +#define SPARSE_UTILS_C +///DENSE accumulator utils +/* + * alloc threads' aux arrays once and split them in threads' structures + * so free them once from the first thread struct, with the original pointers returned from the alloc + */ +ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size){ //TODO PERF WITH NEXT + ACC_DENSE* out = NULL; + double* vAll = NULL; + ulong* vAllNzIdx = NULL; + if (!(out = calloc(num,sizeof(*out)))){ + ERRPRINT("_initAccVectors aux struct alloc failed\n"); + return NULL; + } + if (!(vAll = calloc(num*size,sizeof(*vAll)))) { + ERRPRINT("_initAccVectors aux dense vectors alloc failed\n"); + goto err; + } + if (!(vAllNzIdx = calloc(num*size,sizeof(*vAllNzIdx)))) { + ERRPRINT("_initAccVectors aux dense vectors' idx alloc failed\n"); + goto err; + } + for (ulong i=0; ivLen = size; + if (!(v->v = calloc(size,sizeof(*(v->v))))) { + ERRPRINT("_initAccVectors aux dense vector alloc failed\n"); + return EXIT_FAILURE; + } + if (!(v->nnzIdx = calloc(size,sizeof(*(v->nnzIdx))))) { + ERRPRINT("_initAccVectors aux dense vector' idx alloc failed\n"); + return EXIT_FAILURE; + } + if (initSpVectIdxDenseAcc(size, &v->nnzIdxMap)) return EXIT_FAILURE; + + return EXIT_SUCCESS; +} + +ACC_DENSE* _initAccVectors(ulong num,ulong size){ + ACC_DENSE* out = NULL; + if (!(out = calloc(num,sizeof(*out)))){ + ERRPRINT("_initAccVectors aux struct alloc failed\n"); + return NULL; + } + for (ulong i=0; ilen = 0; + nnz_idxs_flags_t* idxMaps = &vectIdxsMap->idxsMap; + #if SPVECT_IDX_BITWISE == TRUE //nnz presence falgs as bitflags in limbs + vectIdxsMap->idxsMapN = INT_DIV_CEIL(idxMax, LIMB_SIZE_BIT); + #else //nnz presence falgs in a single array + vectIdxsMap->idxsMapN = idxMax; + #endif //SPVECT_IDX_BITWISE == TRUE + if (!(*idxMaps = calloc(vectIdxsMap->idxsMapN, sizeof(**idxMaps)))) { + ERRPRINT("initSpVectIdxDenseAcc\tidxMaps SPVECT_IDX_BITWISE callc err\n"); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +void checkOverallocPercent(ulong* forecastedSizes,spmat* AB){ + for (ulong r=0,rSize,forecastedSize; r < AB->M; r++){ + forecastedSize = forecastedSizes[r]; + #ifdef ROWLENS + rSize = AB->RL[r]; + #else + rSize = AB->IRP[r+1] - AB->IRP[r]; + #endif + DEBUGCHECKS{ + if ( forecastedSize < rSize ){ + ERRPRINT("BAD FORECASTING\n"); + assert(forecastedSize >= rSize ); + } + } + DEBUGPRINT + printf("extra forecastedSize of row: %lu\t=\t%lf %% \n", + r,100*(forecastedSize-rSize) / (double) forecastedSize); + } + idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ; + printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n", + extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]); +} +int spmatDiff(spmat* A, spmat* B){ + if (A->NZ != B->NZ){ + ERRPRINT("NZ differ\n"); + return EXIT_FAILURE; + } + if (memcmp(A->IRP,B->IRP,A->M)){ + ERRPRINT("IRP differ\n"); + return EXIT_FAILURE; + } + if (doubleVectorsDiff(A->AS,B->AS,A->NZ,NULL)){ + ERRPRINT("AS DIFFER\n"); + return EXIT_FAILURE; + } + if (memcmp(A->JA,B->JA,A->NZ)){ + ERRPRINT("JA differ\n"); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +double* CSRToDense(spmat* sparseMat){ + double* denseMat; + ulong i,j,idxNZ,denseSize; + if (__builtin_umull_overflow(sparseMat->M,sparseMat->N,&denseSize)){ + ERRPRINT("overflow in dense allocation\n"); + return NULL; + } + if (!(denseMat = calloc(denseSize, sizeof(*denseMat)))){ + ERRPRINT("dense matrix alloc failed\n"); + return NULL; + } + for (i=0;iM;i++){ + for (idxNZ=sparseMat->IRP[i]; idxNZIRP[i+1]; ++idxNZ){ + j = sparseMat->JA[idxNZ]; + //converting sparse item into dense entry + denseMat[(ulong) IDX2D(i,j,sparseMat->N)] = sparseMat->AS[idxNZ]; + } + } + return denseMat; +} +void printSparseMatrix(spmat* spMatrix,char justNZMarkers){ + double* denseMat = CSRToDense(spMatrix); + if (!denseMat) return; + printMatrix(denseMat,spMatrix->M,spMatrix->N,justNZMarkers); + free(denseMat); +} + + + +static inline int _colsPartitioningUnifRanges_init(spmat* A,uint gridCols, + spmat** colParts,idx_t** colPartsLens){ + + spmat* colPart; + ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *tmpJA; + ///alloc/init partitions structures + if (!(*colParts = calloc(gridCols, sizeof(**colParts)))){ + ERRPRINT("colsPartitioningUnifRanges\tcolumns partitions of A calloc fail\n"); + return EXIT_FAILURE; + } + for (ulong i=0,colBlock; iM,colBlock,colPart)){ + ERRPRINT("colsPartitioningUnifRanges\tallocSpMatrixInternal partition err\n"); + return EXIT_FAILURE; + } + //TODO TODO overalloc A cols partitions NZ arrays, then realloc + if (!(colPart->AS = malloc(A->NZ * sizeof(*A->AS)))){ + ERRPRINT("colPart of A overalloc of AS errd\n"); + return EXIT_FAILURE; + } + if (!(colPart->JA = malloc(A->NZ * sizeof(*A->JA)))){ + ERRPRINT("colPart of A overalloc of JA errd\n"); + return EXIT_FAILURE; + } + } + //for each A col partition -> last copied nz index = nnz copied ammount + if (! (*colPartsLens = calloc(gridCols, sizeof(**colPartsLens))) ) { + ERRPRINT("colsPartitioningUnifRanges: colPartsLens calloc errd\n"); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,uint gridCols, + spmat* colParts,idx_t* colPartsLens){ + + spmat* colPart; + double* tmpAS; idx_t* tmpJA; + //realloc overallcd A parts NZ arrays (TODO -> downsizing -> nofails?) + for (ulong i=0,partLen; iAS,partLen*sizeof(*(colPart->AS))))){ + ERRPRINT("realloc overallocated cols partition AS array\n"); + return EXIT_FAILURE; + } + colPart->AS = tmpAS; + if (!(tmpJA = realloc(colPart->JA,partLen*sizeof(*(colPart->JA))))){ + ERRPRINT("realloc overallocated cols partition JA array\n"); + return EXIT_FAILURE; + } + colPart->JA = tmpJA; + colPart->NZ = partLen; + colPart->IRP[A->M] = partLen; + } + return EXIT_SUCCESS; +} +#endif + + +#ifndef OFF_F + #error generic implementation requires OFF_F defined +#endif +//////////////////////// CSR SPECIFIC -- TODO RENAME ////////////////// +///SPARSE MATRIX PARTITIONING +idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){ + ulong subRowsN = A->M * gridCols; + ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols; + ulong* offsets = malloc( (subRowsN+1) * sizeof(*offsets) ); + if (!offsets) { + ERRPRINT("colsOffsetsPartitioningUnifRanges:\toffsets malloc errd\n"); + return NULL; + } + ///OFFSETS COMPUTE FOR COL GROUPS -> O( A.NZ ) + for (ulong r=0, j=0; rM; j=A->IRP[++r]-OFF_F){ + offsets[ IDX2D(r,0,gridCols) ] = j; //row's first gc start is costrained + //navigate column groups inside current row + for (ulong gc=1,gcStartCol; gcJA[j]-OFF_F; cIRP[r+1]-OFF_F; c=A->JA[++j]-OFF_F); + while (j < A->IRP[r+1]-OFF_F && A->JA[j]-OFF_F < gcStartCol) + j++; + offsets[ IDX2D(r,gc,gridCols) ] = j; //row's gc group startIdx + } + } + offsets[subRowsN] = A->NZ; //last row's partition end costrained + return offsets; +} + +spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols, + idx_t** colPartsOffsets) +{ + spmat *colParts = NULL, *colPart; + ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols; + ulong *colPartsLens=NULL, *tmpJA; + double* tmpAS; + + ///alloc/init partitions structures + idx_t* colOffsets = NULL; + if (!(colOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(A,gridCols))) + goto _err; + if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens)) + goto _err; + //OFFSET BASED COPY OF A.COL_GROUPS -> O( A.NZ ) + for (idx_t r=0,gcId=0; rM; r++){ + for (idx_t gc=0,gcStartIdx=0,gLen=0; gcIRP[r] = colPartsLens[gc]; //new line for the col partition + //actual copy of nnz entries to colPartitions + memcpy(colPart->AS+colPart->IRP[r], A->AS+gcStartIdx, gLen*sizeof(*A->AS)); + memcpy(colPart->JA+colPart->IRP[r], A->JA+gcStartIdx, gLen*sizeof(*A->JA)); + colPartsLens[gc] += gLen; + #ifdef ROWLENS + colPart->RL[r] = i; + #endif + } + } + + //realloc overallcd A parts NZ arrays + if(_colsPartitioningUnifRanges_finalRealloc(A,gridCols,colParts,colPartsLens)) + goto _err; + + free(colPartsLens); + if (colPartsOffsets) *colPartsOffsets = colOffsets; //save for the caller + else free(colOffsets); + + return colParts; + _err: + free(*colPartsOffsets); + for (ulong i=0; iN/gridCols, _colBlockRem = A->N%gridCols, *colPartsLens=NULL, *tmpJA; + double* tmpAS; + ///alloc/init partitions structures + if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens)) + goto _err; + /* TODO + * Parallelize: 2for collapse OMP, gcEndCol -> startIdxize, ... + * oppure wrappare cio in static inline + */ + for (ulong r=0, j=0; rM; j=A->IRP[++r]-OFF_F){ + //navigate column groups inside current row + for (ulong gc=0,gcEndCol=0,i; gc kept as they were originally, handled with shift in functions + colPart->IRP[r] = colPartsLens[gc]; + gcEndCol += UNIF_REMINDER_DISTRI(gc,_colBlock,_colBlockRem); + //goto next GroupCols,keeping A's nnz entries navigation ( index j+i ) + //for (ulong c=A->JA[j+i]-OFF_F; cIRP[r+1]-OFF_F; c=A->JA[j+ ++i]-OFF_F); + while ( j+i < A->IRP[r+1]-OFF_F && A->JA[j+i]-OFF_F < gcEndCol ) i++; + memcpy(colPart->AS+colPart->IRP[r], A->AS+j, i*sizeof(*A->AS)); + memcpy(colPart->JA+colPart->IRP[r], A->JA+j, i*sizeof(*A->JA)); + + colPartsLens[gc] += i; + #ifdef ROWLENS + colPart->RL[r] = i; + #endif + } + } + //realloc overallcd A parts NZ arrays + if(_colsPartitioningUnifRanges_finalRealloc(A,gridCols,colParts,colPartsLens)) + goto _err; + free(colPartsLens); + return colParts; + _err: + for (ulong i=0; iM*gridCols; partId++){ + forecast = forecastedSizes[partId]; + rLen = abColOffsets[partId+1] - abColOffsets[partId]; + DEBUGCHECKS assert(forecast >= rLen); + } + idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ; + printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n", + extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]); + + free(abColOffsets); + +} + + +#ifdef SPARSEUTILS_MAIN_TEST ///unit test embbeded + +///inline export here +//SPMV_CHUNKS_DISTR spmvChunksFair; +spmat* allocSpMatrix(ulong rows, ulong cols); +int allocSpMatrixInternal(ulong rows, ulong cols, spmat* mat); +void freeSpmatInternal(spmat* mat); +void freeSpmat(spmat* mat); + +////INTERNAL TEST FUNCTIONS +//test that each row's partition from colsOffsetsPartitioningUnifRanges is in the correct index range +#include +int testColsOffsetsPartitioningUnifRanges(spmat* mat,ulong gridCols,ulong* partsOffs){ + ulong _colBlock = mat->N/gridCols, _colBlockRem = mat->N%gridCols; + ulong j=0; //CSR scanning nnz idx + ulong* colPartsPopulations = alloca(gridCols * sizeof(*colPartsPopulations)); + memset(colPartsPopulations,0,gridCols * sizeof(*colPartsPopulations)); + for (ulong r=0,pId=0; rM; r++){ + for (ulong gc=0,pStartIdx,pEndIdx; gcJA[idx]; + assert(j == idx); //consecutive index in partitioning + assert(pStartIdx <= c && c <= pEndIdx); //colRange + assert(mat->IRP[r] <= idx && idx <= mat->IRP[r+1] ); //rowRange + } + colPartsPopulations[gc] += partsOffs[pId+1] - partsOffs[pId]; + } + } + assert(j == mat->NZ); + ulong s=0; + for (ulong gc=0,partSize; gc < gridCols; gc++,s+=partSize){ + partSize = colPartsPopulations[gc]; + double partShare=partSize/(double)mat->NZ,partsAvg=1/(double)gridCols; + double partShareAvgDiff = partShare - partsAvg; + printf("colPartition %lu has:\t%lu = %lf of NNZ\t\t .. %lf\tAVG diff\n", + gc,partSize,partShare,partShareAvgDiff); + } + assert(s == mat->NZ); //TODO DUPLICATED + return EXIT_SUCCESS; +} + +CONFIG Conf = { + .gridRows = 8, + .gridCols = 8, +}; + +#include "parser.h" +int main(int argc, char** argv){ + int out=EXIT_FAILURE; + if (init_urndfd()) return out; + if (argc < 2 ) {ERRPRINT("COO MATRIX FOR TEST"); return out;} + ////parse sparse matrix and dense vector + spmat* mat; + char* trgtMatrix = TMP_EXTRACTED_MARTIX; + if (extractInTmpFS(argv[1],TMP_EXTRACTED_MARTIX) < 0) + trgtMatrix = argv[1]; + if (!(mat = MMtoCSR(trgtMatrix))){ + ERRPRINT("err during conversion MM -> CSR\n"); + return out; + } + ////partitioning test + ulong* colsPartitions = colsOffsetsPartitioningUnifRanges_0(mat,Conf.gridCols); + if (!colsPartitions) goto _free; + if (testColsOffsetsPartitioningUnifRanges(mat,Conf.gridCols,colsPartitions)) + goto _free; + + out=EXIT_SUCCESS; + printf("testColsOffsetsPartitioningUnifRanges passed with " + "mat: %lux%lu-%luNNZ\tgrid: %dx%d\n", + mat->M,mat->N,mat->NZ,Conf.gridRows,Conf.gridCols); + _free: + if (colsPartitions) free(colsPartitions); + + return out; +} +#endif //SPARSEUTILS_MAIN_TEST diff --git a/base/serial/impl/sp3mm4amg/commons/sparseUtilsMulti.c b/base/serial/impl/sp3mm4amg/commons/sparseUtilsMulti.c new file mode 100644 index 00000000..c7ba2c84 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/commons/sparseUtilsMulti.c @@ -0,0 +1,38 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include "sparseUtilsMulti.h" + +#define OFF_F 0 +#include "sparseUtilsGeneric.c" +#undef OFF_F + +#define OFF_F 1 +#include "sparseUtilsGeneric.c" +#undef OFF_F diff --git a/base/serial/impl/sp3mm4amg/commons/utils.c b/base/serial/impl/sp3mm4amg/commons/utils.c new file mode 100644 index 00000000..d969bf82 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/commons/utils.c @@ -0,0 +1,502 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +//various aux functions +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include + +#include "macros.h" +#include "sparseMatrix.h" +#include "utils.h" + + +int urndFd; //will point to urandom device file + +///IO +//UNBUFFERED IO +int write_wrap(int fd,void* src,size_t count){ + ssize_t wr; + size_t written=0; + while (written < count){ + wr=write(fd,src+written,count-written); + if (wr<0){ + perror("write"); + return wr; + } + written+=wr; + } + return 0; +} + +int read_wrap(int fd,void* dst,size_t count){ + ssize_t rd; + size_t readed=0; + while (readed < count){ + rd=read(fd,dst+readed,count-readed); + if (rd<0){ + perror("read"); + return rd; + } + readed+=rd; + } + return 0; +} + +int readALL_wrap(int fd,char** dst,size_t* count){ + ssize_t rd=!0; //to allow count > fsize + size_t readed=0; + char allocated=0; //flag if required *dst allocation + if (!(*dst)){ //allocate dst buffer of same size of file + off_t seekCurr=lseek(fd,0,SEEK_CUR); + off_t fsize=lseek(fd,0,SEEK_END); + if( seekCurr==-1 || fsize==-1 || lseek(fd,seekCurr,SEEK_SET)==-1){ + perror("lseek"); + return EXIT_FAILURE; + } + *count=fsize; + if (!(*dst=malloc(fsize))){ + fprintf(stderr,"malloc read_wrap file size buf error\n"); + return EXIT_FAILURE; + } + allocated=!0; + } + //read loop + while (readed < *count && rd > 0){ + rd=read(fd,(*dst)+readed,*count-readed); + if (rd<0){ + perror("read"); + if (allocated) free(*dst); + return rd; + } + readed+=rd; + } + if (readed < *count) (*dst)[readed]='\0'; //TODO NEEDED? + return EXIT_SUCCESS; +} +int init_urndfd(){ // wrap init urndFd + if((urndFd=open(DRNG_DEVFILE,O_RDONLY))<0){ + perror("open DRNG_DEVFILE"); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} +int createNewFile(char* const outFpath){ + int mode=S_IRWXU; + errno = 0; + int outFd=open(outFpath, O_WRONLY | O_CREAT | O_TRUNC, mode); + if (errno==EEXIST) outFd=open(outFpath, O_WRONLY | O_TRUNC, mode); + if (outFd<0) perror("open outFd failed "); + return outFd; +} +///BUFFERED IO +//TODO series of 0 returns.... fix +int fread_wrap(FILE* fp,void* dst,size_t count){ + int rd=0,toRead; + size_t readed=0; + while (readed < count){ + toRead = count - readed; + rd=fread(dst+readed,1,toRead,fp); + if (rd != toRead){ //TODO SHORT ITEM COUNT + if (feof(fp)) return EOF; + else if (ferror(fp)){ + ERRPRINT("fread_wrap errd\n"); + return -2; + } + //else ERRPRINT("W** SHORT ITEM COUNT RETURN MEANS!?"); //TODO OO + } + readed+=rd; + } + return rd; +} +///STRUCTURED DATA IO +int writeDoubleVector(char* fpath,double* v,ulong size){ + int fd,out=EXIT_FAILURE; + if ( (fd = createNewFile(fpath) ) < 0 ) goto _end; + write_wrap(fd,v,size * sizeof(*v)); //raw write of vector + out = EXIT_SUCCESS; + DEBUG printf("written double vector into %s RAW of %lu elements\n",fpath,size); + + _end: + if (close(fd) == EOF) perror("close errd\n"); + return out; +} + +///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF +int writeDoubleVectorAsStr(char* fpath,double* v,ulong size){ + int out=EXIT_FAILURE; + FILE* fp = fopen(fpath,"w"); + if (!fp){ + perror("fopen vector file write"); + return EXIT_FAILURE; + } + for (ulong i=0; i= vectorSize ){ //realloc the array + vectorSize *= VECTOR_STEP_REALLOC; + if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){ + ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20); + goto _err; + } + out = tmp; + DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20); + } + ulong toRead = MIN(VECTOR_READ_BLOCK,(vectorSize-i)); + if((rd = fread_wrap(fp,out + i,sizeof(*out)*toRead)) == -2){ + ERRPRINT("fread_wrap errd\n"); + goto _err; + } + if(feof(fp)) //TODO rd == EOF not work always != W*F??? + break; + i += rd/sizeof(out); + if( (hleft = rd % sizeof(out)) ){ + DEBUG hprintf("half left double read... rounding down fptr\n"); + if(fseek(fp,-hleft,SEEK_CUR)){ + perror("fseek in readDoubleVector"); + goto _err; + } + } + } + //REALLOC THE ARRAY TO THE FINAL SIZE + assert( i > 0 ); + if (!(tmp = realloc(out,*size*sizeof(*out)))){ + ERRPRINT("realloc errd\n"); + goto _err; + } + out = tmp; + DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size); + goto _free; + + _err: + free(out); + out = NULL; + _free: + if (fclose(fp) == EOF) perror("fclose errd\n"); + return out; +} + +double* readDoubleVectorStr(char* fpath,ulong* size){ + int fscanfOut; + double *out,*tmp; + ulong i=0,vectorSize=RNDVECTORSIZE; + if (*size) vectorSize = *size; + FILE* fp = fopen(fpath,"r"); + if (!fp){ + perror("fopen vector file"); + return NULL; + } + if (!(out = malloc(vectorSize * sizeof(*out)))){ + ERRPRINT("vector read malloc fail for file\n"); + goto _err; + } + while (1){ + if (i >= vectorSize ){ //realloc the array + vectorSize *= VECTOR_STEP_REALLOC; + if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){ + ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20); + goto _err; + } + out = tmp; + DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20); + } + fscanfOut = fscanf(fp,DOUBLE_STR_FORMAT,out + i++ ); + if ( fscanfOut == EOF && ferror(fp)){ + perror("invalid fscanf"); + goto _err; + } + if ( fscanfOut != 1 || fscanfOut == EOF ) break; //end of vector + } + //REALLOC THE ARRAY TO THE FINAL SIZE + assert( i > 0 ); + *size = --i; + if (!(tmp = realloc(out,*size*sizeof(*out)))){ + ERRPRINT("realloc errd\n"); + goto _err; + } + out = tmp; + DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size); + goto _free; + + _err: + free(out); + out = NULL; + _free: + if (fclose(fp) == EOF) perror("fclose errd\n"); + return out; +} + +///CFG-AUX +int getConfig(CONFIG* conf){ + int changes=EXIT_FAILURE; + char *varVal,*ptr; + ulong val; + if ((varVal = getenv(GRID_ROWS))){ + val=strtoul(varVal,&ptr,10); + if (ptr==varVal || val>= UINT_MAX){ + perror("strtol errd"); + } else { + conf->gridRows = val; + } + changes = EXIT_SUCCESS; + } + if ((varVal = getenv(GRID_COLS))){ + val=strtoul(varVal,&ptr,10); + if (ptr==varVal || val>= UINT_MAX){ + perror("strtol errd"); + } else { + conf->gridCols = val; + } + changes = EXIT_SUCCESS; + } + return changes; +} + +/////LIB-SORTING -- WRAPPERS +//comparing functions +static inline int cmp_idx_t(const void* a, const void*b){ + idx_t aa=*((ulong*) a), bb = *((ulong*) b); + return aa==bb?0:aa>bb?1:-1; +} +static inline int cmpulong(const void* a, const void*b){ + ulong aa=*((ulong*) a), bb = *((ulong*) b); + return aa==bb?0:aa>bb?1:-1; +} +static inline int cmpuint(const void* a, const void*b){ + uint aa=*((uint*) a), bb = *((uint*) b); + return aa==bb?0:aa>bb?1:-1; +} +static inline int cmpRbNode(const void* a, const void* b){ + rbNode *aa=(rbNode*) a, *bb = (rbNode*) b; + return cmp_idx_t(&aa->key,&bb->key); +} +//sorting functions +void sort_idx_t(idx_t* arr, idx_t len){ + qsort(arr,len,sizeof(*arr),cmp_idx_t); +} +void sortulong(ulong* arr, ulong len){ + qsort(arr,len,sizeof(*arr),cmpulong); +} +void sortuint(uint* arr, uint len){ + qsort(arr,len,sizeof(*arr),cmpuint); +} +void sortRbNode(rbNode* arr, idx_t len){ + qsort(arr,len,sizeof(*arr),cmpRbNode); +} + +////Tests AUX +inline void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen){ + if (arrLen > 0 ) return; + for (idx_t i=1,last=arrSorted[0]; i DOUBLE_DIFF_THREASH ){ + out = EXIT_FAILURE; + ERRPRINTS("DOUBLE VECTORS DIFF: DOUBLE_DIFF_THREASH=%lf\t<\t" + "|%13lg| = %lf %% of @a[%lu]\n", + DOUBLE_DIFF_THREASH,diff,100*diffAbs/ABS(a[i]),i); + #ifdef DOUBLE_VECT_DIFF_EARLY_EXIT + /*#pragma message("DOUBLE_VECT_DIFF_EARLY_EXIT: only first diff double reported")*/ + return EXIT_FAILURE; + #endif + } + if ( ABS(*diffMax) < diffAbs ) *diffMax = diff; + } + DEBUG{ + printf("\nchecked diff %s"CEND" between 2 double vector of %lu elements" + "\tmax diff: %le %s threash: %le\n", !out?CCC"OK":CCCERR"ERR!", + n,*diffMax,*diffMax=", + DOUBLE_DIFF_THREASH); + if (!*diffMax){ //self diff check uselss TODO REMOVE + if (!memcpy(a,b,n*sizeof(*a))) + printf("EXACT MATCHING AMONG THE 2 DOUBLE VECTORS\n!"); + } + } + return out; +} + +void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers){ + printf("printing matrix: %lu x %lu\n",m,n); + ulong i,j; + for (i=0;i TRY REGULAR PATH + //search first 2 char after dot of ext in DECOMPRESS_CMDS to get the decompress cmd + if (strlen(ext) < 3 ){ + ERRPRINTS("NOT SUPPORTED DECOMPRESSION:\textension %s too short to be matched",ext); + return -1; + } + char extStart[3]; + extStart[0] = ext[1]; + extStart[1] = ext[2]; + extStart[2] = '\0'; + char* decompressCmdBase = searchPatternInStrs(extStart,DECOMPRESS_CMDS); + if (!decompressCmdBase){ + ERRPRINTS("NOT SUPPORTED DECOMPRESS FOR %s\n",ext); + return -1; + } + uint cmdLen = strlen(decompressCmdBase) + strlen(path) + 4 + strlen(tmpFsDecompressPath); + char* decompressCmd = alloca(cmdLen+1); + if (snprintf(decompressCmd,cmdLen+1,"%s %s > %s", + decompressCmdBase,path,tmpFsDecompressPath) < 0){ + ERRPRINT("extractInTmpFS, snprintf errd\n"); + } + VERBOSE printf("decompressing %s --> %s\ncmd:\t%s\n",path,tmpFsDecompressPath,decompressCmd); + return system(decompressCmd); +} diff --git a/base/serial/impl/sp3mm4amg/fbind/psb_f_spmm_ub.c b/base/serial/impl/sp3mm4amg/fbind/psb_f_spmm_ub.c new file mode 100644 index 00000000..ae302346 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/fbind/psb_f_spmm_ub.c @@ -0,0 +1,107 @@ +#include "../include/Sp3MM_CSR_OMP_Multi.h" +#include "../include/utils.h" + +/** + * @brief performs multiplication of two sparse matrices + * A and B stored in the CRS format. The resulting matrix is C + * + * @param[in] a_m number of columns of A + * @param[in] a_n number of rows of A + * @param[in] a_nz number of non zero elements of A + * @param[in] a_as array with the non zero coefficients of A + * @param[in] a_ja array with the column indices of the non + * zero coefficients of A + * @param[in] a_irp array with the indices of the beginning of + * each row in a_as and a_ja + * @param[in] a_rl array with the lengths of each rows of A + * @param[in] a_max_row_nz maximum number of coefficients in a row in A + * @param[in] b_m number of columns of B + * @param[in] b_n number of rows of B + * @param[in] b_nz number of non zero elements of B + * @param[in] b_as array with the non zero coefficients of B + * @param[in] b_ja array with the column indices of the non + * zero coefficients of B + * @param[in] b_irp array with the indices of the beginning of + * each row in b_as and b_ja + * @param[in] b_rl array with the lengths of each rows of B + * @param[in] b_max_row_nz maximum number of coefficients in a row in B + * @param[out] c_m number of colums of C + * @param[out] c_n number of rows of C + * @param[out] c_nz number of non zero elements in C + * @param[out] c_as array with the non zero coefficients of C + * @param[out] c_ja array with the column indices of the non + * zero coefficients of C + * @param[out] c_irp array with the indices of the beginning of + * each row in c_as and c_ja + * @param[out] c_rl array with the lengths of each rows of C + * @param[out] c_max_row_nz maximum number of coefficients in a row in C + * @param[out] info return value to check if the operation was successful + */ +#ifdef ROWLENS +void psb_f_spmm_row_by_row_ub_0(idx_t a_m, idx_t a_n, idx_t a_nz, + double *a_as, idx_t *a_ja, + idx_t *a_irp, idx_t *a_rl, idx_t a_max_row_nz, + idx_t b_m, idx_t b_n, idx_t b_nz, + double *b_as, idx_t *b_ja, + idx_t *b_irp, idx_t *b_rl, idx_t b_max_row_nz, + idx_t *c_m, idx_t *c_n, idx_t *c_nz, + double **c_as, idx_t **c_ja, + idx_t **c_irp, idx_t **c_rl, idx_t *c_max_row_nz, + int info) +#else +void psb_f_spmm_row_by_row_ub_0(idx_t a_m, idx_t a_n, idx_t a_nz, + double *a_as, idx_t *a_ja, + idx_t *a_irp, idx_t a_max_row_nz, + idx_t b_m, idx_t b_n, idx_t b_nz, + double *b_as, idx_t *b_ja, + idx_t *b_irp, idx_t b_max_row_nz, + idx_t *c_m, idx_t *c_n, idx_t *c_nz, + double **c_as, idx_t **c_ja, + idx_t **c_irp, idx_t *c_max_row_nz, + int info) +#endif +{ + int rc; + spmat *a, *b, *c; + CONFIG *cfg; + + #ifdef ROWLENS + a->RL = a_rl; + b->RL = b_rl; + #endif//ROWLENS + + // setting up cfg + // TODO : CHECK THAT THIS IS COMPATIBLE WITH PSB + rc = getConfig(cfg); + + // setting up spmat type matrices + a->M = a_m; + a->N = a_n; + a->NZ = a_nz; + a->AS = a_as; + a->JA = a_ja; + a->IRP = a_irp; + a->MAX_ROW_NZ = a_max_row_nz; + + b->M = b_m; + b->N = b_n; + b->NZ = b_nz; + b->AS = b_as; + b->JA = b_ja; + b->IRP = b_irp; + b->MAX_ROW_NZ = b_max_row_nz; + + // performing spmm + c = spmmRowByRow_0(a, b, cfg); + + // output result + *(c_m) = c->M; + *(c_n) = c->N; + *(c_nz)= c->NZ; + *(c_as)= c->AS; + *(c_ja)= c->JA; + *(c_irp)=c->IRP; + #ifdef ROWLENS + *(c_rl)= c->RL; + #endif +} \ No newline at end of file diff --git a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h new file mode 100644 index 00000000..57676b59 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h @@ -0,0 +1,94 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +//CORE IMPLEMENTATIONS HEADER +#ifndef SP3MM_CSR_OMP_MULTI_H +#define SP3MM_CSR_OMP_MULTI_H + +///commons single implementation stuff +#include "macros.h" +#include "sparseMatrix.h" + +///aux structures +//hold SPMM result over a unpartitionated space among threads-row[s' blocks] +typedef struct{ + //space to hold SPMM output + ulong* JA; + double* AS; + ulong size; //num of entries allocated -> only dbg checks + ulong lastAssigned; //last JA&AS assigned index to an accumulator(atom) + SPACC* accs; //SPARSIFIED ACC POINTERS + uint accsNum; +} SPMM_ACC; //accumulator for SPMM +///compute function interface and its pointer definitions +typedef spmat* ( SPMM) (spmat*,spmat*,CONFIG*); +typedef spmat* (*SPMM_INTERF) (spmat*,spmat*,CONFIG*); +typedef spmat* ( SP3MM) (spmat*,spmat*,spmat*,CONFIG*,SPMM_INTERF); +typedef spmat* (*SP3MM_INTERF) (spmat*,spmat*,spmat*,CONFIG*,SPMM_INTERF); + +typedef enum { + _1D_DIRECT, + _1D_BLOCKS, + _2D_OFFSET, + _2D_ALLOCD +} SPMM_IMPL_TYPE; +///-- commons single implementation stuff + +///includes +#include "linuxK_rbtree_minimalized.h" +#include "Sp3MM_CSR_OMP_SymbStep_Multi.h" + +//extern char TRGT_IMPL_START_IDX; //multi implementation switch +#include "sparseUtilsMulti.h" +#ifdef OFF_F //save "includer" OFF_F value before overwriting it + #pragma push_macro("OFF_F") + #define _OFF_F_OLD + #undef OFF_F +#endif + + +#define OFF_F 0 +#include "Sp3MM_CSR_OMP_UB_Generic.h" +#include "Sp3MM_CSR_OMP_Num_Generic.h" +#undef OFF_F + +#define OFF_F 1 +#include "Sp3MM_CSR_OMP_UB_Generic.h" +#include "Sp3MM_CSR_OMP_Num_Generic.h" +#undef OFF_F + + + +#ifdef _OFF_F_OLD + #pragma pop_macro("OFF_F") + #undef _OFF_F_OLD +#endif + + +#endif //SP3MM_CSR_OMP_MULTI_H diff --git a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Num_Generic.h b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Num_Generic.h new file mode 100644 index 00000000..91c20449 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Num_Generic.h @@ -0,0 +1,95 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef OFF_F + /*#pragma message("generic implementation requires OFF_F defined")*/ + #error generic implementation requires OFF_F defined +#endif + +/////SYMBOLIC - NUMERIC IMPLEMENTATIONS +///SP3MM FUNCTIONS +/* + * triple matrix multiplication among @R * @AC * @P using gustavson parallel implementation + * implmented as a pair of subsequent spmm operations + * if @conf->spmm != NULL, it will be used as spmm function, otherwise euristics will be + * used to decide wich implementation to use + */ +SP3MM CAT(sp3mmRowByRowPair_SymbNum_,OFF_F); + +/* + * row-by-row-by-row implementation: forwarding @R*@AC rth row to P for row-by-row + * accumulation in preallocated space, TODO exactly determined + * basic parallelization: 1thread per @R's rows that will also forward the result to P + */ +SP3MM CAT(sp3mmRowByRowMerged_SymbNum_,OFF_F); + +///SUB FUNCTIONS +///SPMM FUNCTIONS +/* + * sparse parallel implementation of @A * @B parallelizing Gustavson row-by-row + * formulation using an aux dense vector @_auxDense + * return resulting product matrix + */ +SPMM CAT(spmmRowByRow_SymbNum_,OFF_F); +/* + * sparse parallel implementation of @A * @B parallelizing Gustavson + * with partitioning of @A in @conf->gridRows blocks of rows + * return resulting product matrix + */ +SPMM CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F); + +/* + * sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D + * with partitioning of + * @A into rows groups, uniform rows division + * @B into cols groups, uniform cols division, accessed by aux offsets + */ +SPMM CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F); + +/* + * sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D + * with partitioning of + * @A into rows groups, uniform rows division + * @B into cols groups, uniform cols division, ALLOCATED as CSR submatrixes + */ +SPMM CAT(spmmRowByRow2DBlocksAllocated_SymbNum_,OFF_F); + +///implementation wrappers as static array of function pointers +//sp3mm as pair of spmm +static SPMM_INTERF CAT(Spmm_SymbNum_Funcs_,OFF_F)[] = { + & CAT(spmmRowByRow_SymbNum_,OFF_F), + & CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F), + //& CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F), + //& CAT(spmmRowByRow2DBlocksAllocated_SymbNum_,OFF_F) +}; +//sp3mm as pair of spmm +static SP3MM_INTERF CAT(Sp3mm_SymbNum_Funcs_,OFF_F)[] = { + //& CAT(sp3mmRowByRowMerged_SymbNum_,OFF_F) +}; diff --git a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_SymbStep_Generic.h b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_SymbStep_Generic.h new file mode 100644 index 00000000..017257eb --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_SymbStep_Generic.h @@ -0,0 +1,195 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/* + * CSR Sp[3]MM Symbolic step implementations + * target: compute the output matrix size and the row lens for preallocation + * direct write out partial results + * See interfaces in respective header + * + * MultiImplementations functions with all parameters + * several functions (lower level) has config based on + * OUT_IDXS for returing or using also symbMul Output idxs somehow + * COL_PARTS for returing or using also distribution of symb out idxs in col partition + */ + +///MultiImplementations +///setup aux macros for different signatures implementation via #if arith expr +#ifdef OUT_IDXS + #define _OUT_IDXS TRUE +#else + #define _OUT_IDXS FALSE + #define OUT_IDXS _UNDEF +#endif +#ifdef COL_PARTS + #define _COL_PARTS TRUE +#else + #define _COL_PARTS FALSE + #define COL_PARTS _UNDEF +#endif +/// +#if !defined OFF_F + #error generic implementations requires OFF_F +#endif + + + + +/* + * Compute symbolic product of (nnz indexes of) row @aRowJA and matrix @b + * insert nnz indexes of the mul. result row as nodes in a rbtree rooted at @root + * with nodes in @nodes which have to be enough for the mul result row (use an UB) + * Retuns: multiplication result row NNZ number,se CONFIG_MACROS below for more + * + * CONFIG_MACROS: + * if _OUT_IDXS == TRUE return mul.result row nnz idxs in @outIdxs + * ifdef: OUT_IDXS_RBTREE_NODES: nnz indexes returned inplace sorting rbtree + * as nnz indexes(JA) of the mul result row + * else: stop at returning the mul. result row lenght + * if _COL_PARTS == TRUE return the number of nonzero elements in + * in each of the @gridCols column partitions inside @rowColPartsLens + * OFF_F: offset back indexes from fortran + * TODO also output indexes are shifted (see c_b ) + +idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F) (idx_t* aRowJA,idx_t aRowLen, + spmat* b,rbRoot* root,rbNode* nodes + #if _OUT_IDXS == TRUE + #ifndef OUT_IDXS_RBTREE_NODES + ,idx_t* outIdxs + #endif + #endif + #if _COL_PARTS == TRUE + ,ushort gridCols,idx_t* rowColPartsOffsets + #endif + ); + */ + +/* + * SPVECT_IDX_DENSE_MAP based, as SpMM_Row_Symb_Rbtree but with idxMap aux idx keeping + * CONFIG_MACROS (new) + * IDX_RMUL_SYMB_RBTREE && ( _OUT_IDXS == T || _COL_PARTS == T ): + * (tmp) symb mult out indexes will be kept via a rbtree + * otherwise directly in the out array appending them and then sorting them + * (potentially same n log n) + +idx_t CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F) + ( + idx_t* aRowJA, idx_t aRowLen, spmat* b, SPVECT_IDX_DENSE_MAP* idxsMapAcc + #if _OUT_IDXS == TRUE + ,idx_t* outIdxs + #if IDX_RMUL_SYMB_RBTREE == TRUE + ,rbRoot* root, rbNode* nodes + #endif + #endif // _OUT_IDXS == TRUE + #if _COL_PARTS == TRUE + ,ushort gridCols,idx_t* rowColPartsLens + #endif + ); + */ + +/* + * Compute symbolic product of sparse matrixes @a * @b + * Alloc aux structures based on a upper bounded allocation + * Returns array of exact row lens of the result mul matrix c=@a*@b + * plus an extra entry for the cumulative num of matrix nnz + * (interoperability with upper bound implementations) + * + * CONFIG_MACROS: + * if _OUT_IDXS == TRUE: + * return in *@outIdxs pointers to overallocd JAs nnz indexes + * *outIdxs[0] --> start of first row (its size in 0th of returned array) + * also is the malloc returned address for later free + * [NB: rows are not contiguos in the allocation] + * if _COL_PARTS == TRUE: return in *@rowColPartsLens + * a matrix @a->M * @gridCols of offsets + * for each column partition in each row + */ +idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F) + ( + ROW_MMSYM_IMPL_MODE symbRowImplID, spmat* a, spmat* b + #if _OUT_IDXS == TRUE + ,idx_t*** outIdxs + #endif + #if _COL_PARTS == TRUE + ,ushort gridCols, idx_t** rowColPartsLens + #endif + ); + +//////Sp3MM - rowByrowByrow +#define SP3MM_UB_HEURISTIC 2 +#if !defined COL_PARTS && defined OUT_IDXS +///MultiImplementations functions without COL_PARTS +//NB required OUT_IDXS for initial row-by-row step... +/* + * as earlier but meant for work with + * triple product as rob-by-row-by-row forwarding: + * @abRowJATmp is a tmp storage for first row-by-row sym product + * has to be big enough to store all the possible nonzero elements + * @nodes has to be big enough to store all possible nnz of ab and abc row + * (not only ab row as earlier version) + * + */ +idx_t CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F) + ( + ROW_MMSYM_IMPL_MODE symbMMRowImplID, idx_t* aRowJA,idx_t aRowLen, + spmat* b,spmat* c, rbRoot* root,rbNode* nodes, idx_t* abRowJATmp + #if _OUT_IDXS == TRUE + #ifndef OUT_IDXS_RBTREE_NODES + ,idx_t* outIdxs + #endif + #endif + ); + +/* + * triple product @a*@b*@c as rob-by-row-by-row forwarding: + * Returns: resulting matrix rows' sizes exactly in an array + * plus an extra element for the res.matrix's total size + * CONFIG_MACROS: + * if _OUT_IDXS == TRUE: + * return in *@outIdxs pointers to overallocd JAs nnz indexes + * *outIdxs[0] --> start of first row (its size in 0th of returned array) + * also is the malloc returned address for later free + * [NB: rows are not contiguos in the allocation] + */ +//TODO HEURISTICS IN _OUT_IDXS to avoid serializing after first sym.product +// see HEURISTICS_UB +idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F) + ( + ROW_MMSYM_IMPL_MODE symbMMRowImplID, spmat* a, spmat* b, spmat* c + #if _OUT_IDXS == TRUE + ,idx_t*** outIdxs + #endif + ); + +#endif + +#undef OUT_IDXS +#undef _OUT_IDXS +#undef COL_PARTS +#undef _COL_PARTS diff --git a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_SymbStep_Multi.h b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_SymbStep_Multi.h new file mode 100644 index 00000000..f46a2779 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_SymbStep_Multi.h @@ -0,0 +1,80 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SP3MM_CSR_OMP_SYMB_MULTI +#define SP3MM_CSR_OMP_SYMB_MULTI + +/* Multi implementation of symbolic product of sparse matrixes, config macros + * OFF_F: C-Fortran spmat indexing + * OUT_IDXS: indexes output + * COL_PARTS: partitioning columns output... + */ + +#define OUT_IDXS_ON OutIdxs_ +#define COL_PARTS_ON ColParts_ +#undef OUT_IDXS +#undef COL_PARTS + +#define OFF_F 0 + ///generate basic versions + #include "Sp3MM_CSR_OMP_SymbStep_Generic.h" + ///generate outIdxs versions + #define OUT_IDXS OUT_IDXS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.h" + #undef OUT_IDXS + ///generate colParts versions + #define COL_PARTS COL_PARTS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.h" + //generate outIdxs AND colParts ve sions + #define OUT_IDXS OUT_IDXS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.h" + + #undef OUT_IDXS + #undef COL_PARTS +#undef OFF_F +#define OFF_F 1 + ///generate basic versions + #include "Sp3MM_CSR_OMP_SymbStep_Generic.h" + ///generate outIdxs versions + #define OUT_IDXS OUT_IDXS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.h" + #undef OUT_IDXS + ///generate colParts versions + #define COL_PARTS COL_PARTS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.h" + //generate outIdxs AND colParts ve sions + #define OUT_IDXS OUT_IDXS_ON + #include "Sp3MM_CSR_OMP_SymbStep_Generic.h" + + #undef OUT_IDXS + #undef COL_PARTS +#undef OFF_F + +#endif diff --git a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_UB_Generic.h b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_UB_Generic.h new file mode 100644 index 00000000..dc59a6e7 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_UB_Generic.h @@ -0,0 +1,95 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef OFF_F + /*#pragma message("generic implementation requires OFF_F defined")*/ + #error generic implementation requires OFF_F defined +#endif + +///SP3MM FUNCTIONS +/* + * triple matrix multiplication among @R * @AC * @P using gustavson parallel implementation + * implmented as a pair of subsequent spmm operations + * if @conf->spmm != NULL, it will be used as spmm function, otherwise euristics will be + * used to decide wich implementation to use + */ +SP3MM CAT(sp3mmRowByRowPair_,OFF_F); + +/* + * row-by-row-by-row implementation: forwarding @R*@AC rth row to P for row-by-row + * accumulation in preallocated space, TODO exactly determined + * basic parallelization: 1thread per @R's rows that will also forward the result to P + */ +SP3MM CAT(sp3mmRowByRowMerged_,OFF_F); + +///SUB FUNCTIONS +///SPMM FUNCTIONS +SPMM CAT(spmmSerial_,OFF_F); //mono thread version for debug oracle-less +/* + * sparse parallel implementation of @A * @B parallelizing Gustavson row-by-row + * formulation using an aux dense vector @_auxDense + * return resulting product matrix + */ +SPMM CAT(spmmRowByRow_,OFF_F); +/* + * sparse parallel implementation of @A * @B parallelizing Gustavson + * with partitioning of @A in @conf->gridRows blocks of rows + * return resulting product matrix + */ +SPMM CAT(spmmRowByRow1DBlocks_,OFF_F); + +/* + * sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D + * with partitioning of + * @A into rows groups, uniform rows division + * @B into cols groups, uniform cols division, accessed by aux offsets + */ +SPMM CAT(spmmRowByRow2DBlocks_,OFF_F); + +/* + * sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D + * with partitioning of + * @A into rows groups, uniform rows division + * @B into cols groups, uniform cols division, ALLOCATED as CSR submatrixes + */ +SPMM CAT(spmmRowByRow2DBlocksAllocated_,OFF_F); + +///implementation wrappers as static array of function pointers +//sp3mm as pair of spmm +static SPMM_INTERF CAT(Spmm_UB_Funcs_,OFF_F)[] = { + & CAT(spmmRowByRow_,OFF_F), + & CAT(spmmRowByRow1DBlocks_,OFF_F), + & CAT(spmmRowByRow2DBlocks_,OFF_F), + & CAT(spmmRowByRow2DBlocksAllocated_,OFF_F) +}; +//sp3mm as direct product +static SP3MM_INTERF CAT(Sp3mm_UB_Funcs_,OFF_F)[] = { + & CAT(sp3mmRowByRowMerged_,OFF_F) +}; diff --git a/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h b/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h new file mode 100644 index 00000000..1eb40410 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h @@ -0,0 +1,406 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +///scalar-vector multiply +//TODO che guadagno si ha ad utilizzare solo la versione generica delle successive 2 funzioni +/* + * Sparse vector part <->scalar multiplication in a dense output + * sparse vector part will hold nnz values in @vectVals (AS subvector) + * with corresponding indexes in @vectIdxs in range [0,@vectLen] (JA subvector) + * resulting vector accumulated in a dense array in @aux->v, along with nnzIdx + * both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen + */ +inline void CAT(scSparseVectMul_,OFF_F) + (double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux){ + for (ulong i=0,j; i=aux->vLen){ + fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen); + assert(j < aux->vLen); + } + } + aux->v[j] += vectVals[i] * scalar; //accumulate + //append new nonzero index to auxVNNZeroIdxs for quick sparsify + //if (!(aux->v[j])) aux->nnzIdx[ aux->nnzIdxMap.len++ ] = j; //TODO numerical zero may then cause readd + if(!spVect_idx_in(j,&aux->nnzIdxMap)) + aux->nnzIdx[ aux->nnzIdxMap.len-1 ] = j; + } +} + +/* Same as above scSparseVectMul_ , but consider an initial offset to remove from each idx + * Sparse vector part <->scalar multiplication in a dense output + * @vectVals: sparse vector part values ( from spmat.AS ) + * with [at least] @vectLen corresponding + * target idxs in @vectIdxs (from spmat.JA ) starting from @startIdx + * + * Resulting vector accumulated in a dense array in @aux->v, along with nnzIdx + * all nnz values indexes will be shifted back of @startIdx in @aux + * both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen + */ +inline void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals, + ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux){ + for (ulong i=0,j; i=aux->vLen){ + fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen); + assert(j < aux->vLen); + } + } + aux->v[j] += vectVals[i] * scalar; //accumulate + //append new nonzero index to auxVNNZeroIdxs for quick sparsify + //if (!(aux->v[j])) aux->nnzIdx[ aux->nnzIdxMap.len++ ] = j; + ////TODO numerical zero may then cause readd + if(!spVect_idx_in(j,&aux->nnzIdxMap)) + aux->nnzIdx[ aux->nnzIdxMap.len-1 ] = j; + } +} + + +///MultiImplementations functions +#ifndef OFF_F + #error generic implementations requires OFF_F defined +#endif + +///TODO IMPLEMENT SCALAR<->ROW MUL AS GENERIC SPARSE VECTOR<->SCALAR MUL +//inline void scSparseRowMul(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){ +inline void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){ + ulong rowStartIdx = mat->IRP[trgtR]-OFF_F,rowLen; + #ifdef ROWLENS + rowLen = mat->RL[trgtR]; + #else + rowLen = mat->IRP[trgtR+1] - mat->IRP[trgtR]; + #endif + CAT(scSparseVectMul_,OFF_F)(scalar,mat->AS+rowStartIdx,mat->JA+rowStartIdx,rowLen,aux); + //TODO check impact of generic version use +} + + +///OUTPUT SIZE PREDICTION + +/* O(A.NZ [+B.M] ) + * return array of upper bounded row sizes of the product @A * @B + * also appended at the end for the cumulative total size of the matrix AB = A*B + */ +inline idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B){ + AUDIT_INTERNAL_TIMES Start = omp_get_wtime(); + idx_t* rowSizes = calloc((A->M+1), sizeof(*rowSizes)); + if (!rowSizes){ + ERRPRINT("spMMSizeUpperbound: rowSizes calloc errd\n"); + return NULL; + } + idx_t fullMatBound = 0; + #pragma omp parallel for schedule(static) reduction(+:fullMatBound) + for (idx_t r=0; rM; r++){ + for (idx_t jj=A->IRP[r] - OFF_F,j,rlen; jjIRP[r+1] - OFF_F; jj++){ + j = A->JA[jj] - OFF_F; + #ifdef ROWLENS + rlen = B->RL[j]; + #else + rlen = B->IRP[j+1] - B->IRP[j]; //OFF_F: delta is offset independent + #endif + rowSizes[r] += rlen; + fullMatBound += rlen; + //rowSizes[A->M] += rlen; //just below omp reduction sum + } + } + rowSizes[A->M] = fullMatBound; + AUDIT_INTERNAL_TIMES End= omp_get_wtime(); + VERBOSE + printf("spMMSizeUpperbound:%lu\t%le s\n",rowSizes[A->M],End-Start); + return rowSizes; +} + +/* O(A.NZ + B.NZ) + * return matrix @A.M x gridCols of upper bound + * for each of the @gridCols col partitions of the output matrix AB = @A * @B + * also appended at the end for the cumulative total size of the matrix AB + */ +inline idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F) + (spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets){ + AUDIT_INTERNAL_TIMES Start = omp_get_wtime(); + + idx_t* rowPartsSizes = calloc((A->M*gridCols +1), sizeof(*rowPartsSizes)); + if (!rowPartsSizes){ + ERRPRINT("spMMSizeUpperbound: rowPartsSizes calloc errd\n"); + return NULL; + } + + idx_t fullMatBound = 0; + #pragma omp parallel for schedule(static) reduction(+:fullMatBound) + for (idx_t r=0; rM; r++){ + //for each A.row -> sum B.colParts lens + for (idx_t jj=A->IRP[r]-OFF_F,j,rlen; jjIRP[r+1]-OFF_F; jj++){ + j = A->JA[jj] - OFF_F; + for (idx_t gc=0,bPartID=IDX2D(j,gc,gridCols); gc < gridCols; gc++,bPartID++){ + rlen = bColPartOffsets[bPartID+1] - bColPartOffsets[bPartID]; + rowPartsSizes[ IDX2D(r,gc,gridCols) ] += rlen; + fullMatBound += rlen; + } + } + } + rowPartsSizes[ A->M*gridCols ] = fullMatBound; + AUDIT_INTERNAL_TIMES End= omp_get_wtime(); + VERBOSE + printf("spMMSizeUpperboundColParts_:%lu\t%le s\n",rowPartsSizes[A->M],End-Start); + return rowPartsSizes; +} + + +////Single implementations funcs +#ifndef SPMMUTILS_H_SINGLE_IMPLEMENTATION +#define SPMMUTILS_H_SINGLE_IMPLEMENTATION + +#include "utils.h" +///Allocs - Free +//SpMM holder of accumulats +inline SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum){ + SPMM_ACC* out = calloc(1,sizeof(*out)); + if (!out){ + ERRPRINT("initSpMMAcc: out calloc errd\n"); + return NULL; + } + out->size = entriesNum; + if (!(out->JA = malloc(entriesNum * sizeof(*(out->JA))))){ + ERRPRINT("initSpMMAcc: JA malloc errd\n"); + goto _err; + } + if (!(out->AS = malloc(entriesNum * sizeof(*(out->AS))))){ + ERRPRINT("initSpMMAcc: AS malloc errd\n"); + goto _err; + } + if (!(out->accs = malloc(accumulatorsNum * sizeof(*(out->accs))))){ + ERRPRINT("initSpMMAcc: accs malloc errd\n"); + goto _err; + } + return out; + + _err: + if (out->JA) free(out->JA); + if (out->AS) free(out->AS); + if (out->accs) free(out->accs); + if (out) free(out); + return NULL; +} +inline void freeSpMMAcc(SPMM_ACC* acc){ + free(acc->JA); + free(acc->AS); + free(acc->accs); + free(acc); +} +///// dense acc sparsify functions +/* + * sparsify dense accumulated vector @accV (with shifted of @startColAcc) + * into sparse accumulator @accSparse that'll use space for nnz entries from @acc +*/ +///DIRECT OUT MATRIX SPARSIFY +/* + * sparsify @accV directly inside row @row of matrix @m + * considering, if given not NULL, 2D partitioning with + * @gridCols cols groups and colGroups offsets per row matrix @colPartsOffsets + * :Returns inplace modify of @m + */ +static inline void sparsifyDirect(ACC_DENSE* accV,spmat* m,idx_t row){ + idx_t nnz = accV->nnzIdxMap.len, sparsifyStart = m->IRP[row], sparsifyEnd = m->IRP[row+1]; + sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write + DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz); + DEBUGCHECKS assert(nnz <= (sparsifyEnd - sparsifyStart)); + + for (idx_t i=0,j; i < nnz; i++){ + j = accV->nnzIdx[i]; + m -> JA[sparsifyStart + i] = j; //+ startColAcc; + m -> AS[sparsifyStart + i] = accV->v[j]; + } +} +//TODO 2D partitioning - colParts version of above...best to use multi impl trick of symbStep +static inline void sparsifyDirectColParts(ACC_DENSE* accV,spmat* m,idx_t row, + ushort colGroupId,ushort gridCols, idx_t* colPartsOffsets,idx_t startCol){ + idx_t nnz = accV->nnzIdxMap.len; + ushort partID = IDX2D(row,colGroupId,gridCols); + idx_t sparsifyStart = colPartsOffsets[partID], sparsifyEnd = colPartsOffsets[partID+1]; + sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write + DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz); + DEBUGCHECKS assert( nnz == sparsifyEnd - sparsifyStart ); + sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write + + for (idx_t i=0,j; i < nnz; i++){ + j = accV->nnzIdx[i]; + m -> JA[sparsifyStart + i] = j + startCol; + m -> AS[sparsifyStart + i] = accV->v[j]; + } + +} + +///UB SPACE SPARSIFY +//internal sparsivy dense acc inside (prepared) sparse acc struct +static inline void _sparsifyUB(ACC_DENSE* accV,SPACC* accSparse,idx_t startColAcc){ + idx_t nnz = accV->nnzIdxMap.len; + sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write + DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz); + for (idx_t i=0,j; i < nnz; i++){ + j = accV->nnzIdx[i]; + accSparse -> JA[i] = j + startColAcc; + accSparse -> AS[i] = accV->v[j]; + } + accSparse -> len = nnz; +} + +//row[Part] sparsified in a thread safe (exactly long) reserved area using atomics +static inline void sparsifyUBNoPartsBounds + (SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc){ + //sort nnz indexes of dense accumulator + idx_t nnz = accV->nnzIdxMap.len; + idx_t sparsifyStartV; //start index(inside @accSparse) of @accV to sparsify + //sparsifyStartV = __atomic_fetch_add(&(acc->lastAssigned),nnz,__ATOMIC_ACQ_REL); + #pragma omp atomic capture + { //fetch and add like .... + sparsifyStartV = acc->lastAssigned; + acc->lastAssigned += nnz; + } + DEBUGCHECKS{ + if (acc->lastAssigned >= acc->size){ + ERRPRINT("OMP ATOMIC OR SG ERRD IN SPACE ASSIGNMENTS...\n"); + assert(acc->lastAssigned < acc->size); + } + } + // + accSparse -> AS = acc->AS + sparsifyStartV; + accSparse -> JA = acc->JA + sparsifyStartV; + _sparsifyUB(accV,accSparse,startColAcc); +} +////output-gather functons +/* + * merge @conf->gridCols*@mat->M sparse rows partitions into @mat + * EXPECTED rowsParts @rowsParts to be sorted in accord to the + * 2D rowMajor computing grid given in @conf + * allocd arrays to hold non zero values and indexes into @mat + */ +inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat, + CONFIG* conf){ + ulong nzNum=0,j,rLen,idx,partsNum = mat->M * conf->gridCols; + //TODO PARALLEL MERGE - SAVE STARTING offset OF EACH PARTITION IN THE OUT MATRIX + ulong* rowsPartsOffsets=alloca(partsNum*sizeof(*rowsPartsOffsets)); + ///count nnz entries and alloc arrays for them + for (ulong r=0; rM; r++){ + //for each partition ->get len -> outMat.IRP and aux offsets + for (j=0,rLen=0; jgridCols; j++){ + idx = IDX2D(r,j,conf->gridCols); + rowsPartsOffsets[idx]=nzNum+rLen;//part start=prev accumulated end + rLen += rowsParts[idx].len; + } + nzNum += rLen; + mat->IRP[r+1] = nzNum; + #ifdef ROWLENS + mat->RL[r] = rLen; + #endif + } + mat->NZ = nzNum; + if (!(mat->AS = malloc(nzNum * sizeof(*(mat->AS))))){ + ERRPRINT("merged sparse matrix AS alloc errd\n"); + return EXIT_FAILURE; + } + if (!(mat->JA = malloc(nzNum * sizeof(*(mat->JA))))){ + ERRPRINT("merged sparse matrix JA alloc errd\n"); + return EXIT_FAILURE; + } + ///popolate with rows nnz values and indexes + ulong pLen; //omp for aux vars + #pragma omp parallel for schedule(static) private(pLen) + for (ulong i=0; iAS + rowsPartsOffsets[i],rowsParts[i].AS,pLen*sizeof(*(mat->AS))); + memcpy(mat->JA + rowsPartsOffsets[i],rowsParts[i].JA,pLen*sizeof(*(mat->JA))); + } + CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually + for (ulong i=0,w=0; iM; i++){ + if (mat->IRP[i] != w) + {ERRPRINT("MERGE ROW ERR IRP\n");return -1;} + for (j=0; jgridCols; j++){ + SPACC r = rowsParts[IDX2D(i,j,conf->gridCols)]; + for (ulong jj=0; jjAS[w]!= r.AS[jj]){ + ERRPRINT("MERGE ROW ERR AS\n"); return -1;} + if (mat->JA[w]!= r.JA[jj]){ + ERRPRINT("MERGE ROW ERR JA\n"); return -1;} + } + } + } + } + return EXIT_SUCCESS; +} + +/* + * merge @mat->M sparse rows @rows in sparse matrix @mat + * EXPECTED @rows to be sorted in accord to trgt matrix row index @r + * allocd arrays to hold non zero values and indexes into @mat + */ +inline int mergeRows(SPACC* rows,spmat* mat){ + ulong nzNum=0; + //count nnz entries and alloc arrays for them + for (ulong r=0; rM; ++r){ + nzNum += rows[r].len; + mat->IRP[r+1] = nzNum; + #ifdef ROWLENS + mat->RL[r] = rows[r].len + #endif + } + mat->NZ = nzNum; + if (!(mat->AS = malloc(nzNum * sizeof(*(mat->AS))))){ + ERRPRINT("merged sparse matrix AS alloc errd\n"); + return EXIT_FAILURE; + } + if (!(mat->JA = malloc(nzNum * sizeof(*(mat->JA))))){ + ERRPRINT("merged sparse matrix JA alloc errd\n"); + return EXIT_FAILURE; + } + ///POPOLATE WITH ROWS NNZ VALUES AND INDEXES + //TODO PARALLEL COPY + #pragma omp parallel for schedule(static) + for (ulong r=0; rM; r++){ + memcpy(mat->AS+mat->IRP[r], rows[r].AS, rows[r].len*sizeof(*(mat->AS))); + memcpy(mat->JA+mat->IRP[r], rows[r].JA, rows[r].len*sizeof(*(mat->JA))); + } + CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually + for (ulong r=0,i=0; rM; r++){ + if (i != mat->IRP[r]) + {ERRPRINT("MERGE ROW ERR IRP\n");return -1;} + for (ulong j=0; jAS[i]!= rows[r].AS[j]){ + ERRPRINT("MERGE ROW ERR AS\n"); return -1;} + if (mat->JA[i] != rows[r].JA[j]){ + ERRPRINT("MERGE ROW ERR JA\n"); return -1;} + } + } + } + return EXIT_SUCCESS; +} + +#endif //SPMMUTILS_H_SINGLE_IMPLEMENTATION + diff --git a/base/serial/impl/sp3mm4amg/include/SpMMUtilsMulti.h b/base/serial/impl/sp3mm4amg/include/SpMMUtilsMulti.h new file mode 100644 index 00000000..cd40c494 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/SpMMUtilsMulti.h @@ -0,0 +1,56 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPMMUTILSMULTI_H +#define SPMMUTILSMULTI_H + +extern char TRGT_IMPL_START_IDX; //multi implementation switch + +#ifdef OFF_F //save "includer" OFF_F value before overwriting it + #pragma push_macro("OFF_F") + #define _OFF_F_OLD + #undef OFF_F +#endif + +#define OFF_F 0 +#include "SpMMUtilsGeneric.h" +#undef OFF_F + +#define OFF_F 1 +#include "SpMMUtilsGeneric.h" +#undef OFF_F + +#ifdef _OFF_F_OLD + #pragma pop_macro("OFF_F") + #undef _OFF_F_OLD +#endif + +#endif + diff --git a/base/serial/impl/sp3mm4amg/include/config.h b/base/serial/impl/sp3mm4amg/include/config.h new file mode 100644 index 00000000..a11f6d21 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/config.h @@ -0,0 +1,155 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef CONFIG_H +#define CONFIG_H +//CONFIG STRUCT DEPENDECIES +//switch among diferent symb row -> rows implementation var +typedef enum{ + RBTREE, + IDXMAP +} ROW_MMSYM_IMPL_MODE; + +// +typedef struct{ + ushort gridRows; + ushort gridCols; + //TODO FULL CONFIG DOCCED HERE + ROW_MMSYM_IMPL_MODE symbMMRowImplID; //how to compute symb mul (if required) + uint threadNum; //thread num to use in an OMP parallel region ... + void* chunkDistrbFunc; //CHUNKS_DISTR_INTERF func pntr +} CONFIG; +///Smart controls +typedef size_t idx_t; //spmat indexes +typedef unsigned __int128 uint128; + +#include "macros.h" +#ifndef SPARSIFY_PRE_PARTITIONING + #define SPARSIFY_PRE_PARTITIONING TRUE //u.b. implementation will sparsify dense acc in a pre splitted mem area +#endif +///AUDIT&CHECKS +//debug checks and tmp stuff +#ifndef DEBUG + #define DEBUG if( TRUE ) +#endif +//long prints +#ifndef DEBUGPRINT + #define DEBUGPRINT if( FALSE ) +#endif +//heavy impact debug checks +#ifndef DEBUGCHECKS + #define DEBUGCHECKS if( FALSE ) +#endif +//extra print in the normal output +#ifndef AUDIT_INTERNAL_TIMES + #define AUDIT_INTERNAL_TIMES if( TRUE ) +#endif +#ifndef VERBOSE + #define VERBOSE if( FALSE ) +#endif +//extra checks over the imput and correct partials +#ifndef CONSISTENCY_CHECKS + #define CONSISTENCY_CHECKS if( TRUE ) +#endif +#ifndef SPVECT_IDX_BITWISE //SPVECT_IDX_DENSE_ACC.nnzIdxsFlags will be an array of bitflags + #define SPVECT_IDX_BITWISE TRUE +#endif +#if SPVECT_IDX_BITWISE == TRUE + #ifndef LIMB_T + #define LIMB_T uint128 + #endif + typedef LIMB_T limb_t; + typedef limb_t* nnz_idxs_flags_t; + #define LIMB_SIZE_BIT ( sizeof(limb_t) * 8 ) +#else //nnz idxs ar flags in a byte arry + typedef uchar* nnz_idxs_flags_t; + #define LIMB_SIZE_BIT ( sizeof(uchar) * 8 ) +#endif + +///AUDIT extra configuration +//#define ROWLENS +#ifdef ROWLENS +/*#pragma message("ROW_LENS ARRAY ENABLED")*/ +#endif + +/* + * idxsMapAcc based symb row*rows, outIdxs and colParts carried with an aux rbtree + * otherwise carried throught an append array + * (hyp same computational cost... n + nlog n) + */ +#ifndef IDX_RMUL_SYMB_RBTREE + #define IDX_RMUL_SYMB_RBTREE FALSE +#endif +#ifndef RB_CACHED_INSERT + #define RB_CACHED_INSERT TRUE //use cached insert +#endif +//#define USE_RB_ROOT_CACHE_LMOST //use leftmost leaf cached in rbtree in sym mul +///CONSTS +#define ELL_MAX_ENTRIES ( 6l << 27 ) //2*6GB of ell (padded) entries maxSupport in a matrix +#define LIMIT_ELL_SIZE //enable above threshold +#define ELL_AS_FILLER (0 ) //handled with calloc +//TODO NOW FILLED WITH LAST NNPADDED COL #define ELL_JA_FILLER (-1) +//#define DOUBLE_VECT_DIFF_EARLY_EXIT 1 +//#define RNDVECTMIN 222222 +#define VECTOR_STEP_REALLOC 25 +#define VECTOR_READ_BLOCK 50 //file (raw) vector read block +#define RNDVECTORSIZE 100000 +#define RNDVECTORDUMP TMPDIR "rndVectorDump" +#define RNDVECTORDUMPRAW TMPDIR "rndVectorDumpRaw" +#define OUTVECTORDUMP TMPDIR "outVectorDump" +#define OUTVECTORDUMPRAW TMPDIR "outVectorDumpRaw" +//#define FLOAT_DIFF_ABS +#ifndef AVG_TIMES_ITERATION + #define AVG_TIMES_ITERATION 5 +#endif +//ompChunksDivide.h -> chunksFairFolded() +#ifndef FAIR_CHUNKS_FOLDING + #define FAIR_CHUNKS_FOLDING 4 +#endif +//SPMV specific +//rows partitions for dotProduct SIMD reduction enable +#ifndef SIMD_ROWS_REDUCTION + #define SIMD_ROWS_REDUCTION TRUE +#endif +/*#if SIMD_ROWS_REDUCTION == TRUE + #pragma message("SIMD_ROWS_REDUCTION enabled") + //TODO SOME TRICK TO HAVE 1! PRINT +#endif*/ +extern double Start,End,Elapsed,ElapsedInternal; +#define DOUBLE_DIFF_THREASH 7e-4 +#define DRNG_DEVFILE "/dev/urandom" +#define MAXRND 3e-5 +#ifndef TMPDIR + #define TMPDIR "/tmp/" +#endif +#define TMP_EXTRACTED_MARTIX TMPDIR "extractedMatrix" + +#endif //CONFIG_H diff --git a/base/serial/impl/sp3mm4amg/include/inlineExports.h b/base/serial/impl/sp3mm4amg/include/inlineExports.h new file mode 100644 index 00000000..cc3e0a83 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/inlineExports.h @@ -0,0 +1,49 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +//inline export - single implmentation functions +void cleanRbNodes(rbRoot* root,rbNode* nodes,idx_t nodesNum); +int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key); +//void C_FortranShiftIdxs(spmat* outMat); +//void Fortran_C_ShiftIdxs(spmat* m); +ACC_DENSE* _initAccVectors(ulong num,ulong size); +ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size); //TODO PERF WITH NEXT +SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum); +idx_t reductionMaxSeq(idx_t* arr,idx_t arrLen); +int _allocAccDense(ACC_DENSE* v,ulong size); +int mergeRows(SPACC* rows,spmat* mat); +int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,CONFIG* conf); +void _freeAccsDenseChecks(ACC_DENSE* vectors,ulong num); +void _resetAccVect(ACC_DENSE* acc); +void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc); +void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen); +void freeAccsDense(ACC_DENSE* vectors,ulong num); +void freeSpMMAcc(SPMM_ACC* acc); +void sparsifyDenseVect(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc); diff --git a/base/serial/impl/sp3mm4amg/include/inlineExports_Generic.h b/base/serial/impl/sp3mm4amg/include/inlineExports_Generic.h new file mode 100644 index 00000000..eba852f2 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/inlineExports_Generic.h @@ -0,0 +1,43 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include "macros.h" + +#ifndef OFF_F + #error generic implementation requires OFF_F defined +#endif + +////inline exports +//multi implmentation functions +void CAT(scSparseVectMul_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux); +void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux); +void CAT(_scRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux); +void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux); +idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B); +idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)(spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets); diff --git a/base/serial/impl/sp3mm4amg/include/linuxK_rbtree_minimalized.h b/base/serial/impl/sp3mm4amg/include/linuxK_rbtree_minimalized.h new file mode 100644 index 00000000..187bfe28 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/linuxK_rbtree_minimalized.h @@ -0,0 +1,335 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + + Userspace GNUC porting,del augmented deps and minimalizing for few OPs only: Andrea Di Iorio + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/core-api/rbtree.rst for documentation and samples. +*/ +/* + * RedBlackTree_linux_userspace + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + /* + * https://bitbucket.org/andysnake96/redblacktree_linux_userspace + */ + + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + +//#include //TODO LESS_DEPENDENCIES +//#include //TODO LESS_DEPENDENCIES +//#include //TODO LESS_DEPENDENCIES +#include +#include + +#include ///my usefull macros :) +#include ///my usefull macros :) + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; +/////MINIMALIZE fulfilling rbtree_augmented deps TODO +///rb_node colors labels +#define RB_RED 0 +#define RB_BLACK 1 +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} +///rb_set_parent //TODO GENERIC ADDONLY + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + WRITE_ONCE(parent->rb_left, new); + else + WRITE_ONCE(parent->rb_right, new); + } else + WRITE_ONCE(root->rb_node, new); +} + +///////////////////////////// + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); +extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + + +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +/*static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + rcu_assign_pointer(*rb_link, node); +}**/ //TODO LESS_DEPENDENCIES + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + +/** + * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of + * given type allowing the backing memory of @pos to be invalidated + * + * @pos: the 'type *' to use as a loop cursor. + * @n: another 'type *' to use as temporary storage + * @root: 'rb_root *' of the rbtree. + * @field: the name of the rb_node field within 'type'. + * + * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as + * list_for_each_entry_safe() and allows the iteration to continue independent + * of changes to @pos by the body of the loop. + * + * Note, however, that it cannot handle other modifications that re-order the + * rbtree it is iterating over. This includes calling rb_erase() on @pos, as + * rb_erase() may rebalance the tree, causing us to miss some nodes. + */ +#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ + for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ + pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ + typeof(*pos), field); 1; }); \ + pos = n) + +/* + * Leftmost-cached rbtrees. + * + * We do not cache the rightmost node based on footprint + * size vs number of potential users that could benefit + * from O(1) rb_last(). Just not worth it, users that want + * this feature can always implement the logic explicitly. + * Furthermore, users that want to cache both pointers may + * find it a bit asymmetric, but that's ok. + */ +struct rb_root_cached { + struct rb_root rb_root; + struct rb_node *rb_leftmost; +}; + +#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } + +/* Same as rb_first(), but O(1) */ +#define rb_first_cached(root) (root)->rb_leftmost + +static inline void rb_insert_color_cached(struct rb_node *node, + struct rb_root_cached *root, + bool leftmost) +{ + if (leftmost) + root->rb_leftmost = node; + rb_insert_color(node, &root->rb_root); +} + +static inline void rb_erase_cached(struct rb_node *node, + struct rb_root_cached *root) +{ + if (root->rb_leftmost == node) + root->rb_leftmost = rb_next(node); + rb_erase(node, &root->rb_root); +} + +static inline void rb_replace_node_cached(struct rb_node *victim, + struct rb_node *new, + struct rb_root_cached *root) +{ + if (root->rb_leftmost == victim) + root->rb_leftmost = new; + rb_replace_node(victim, new, &root->rb_root); +} +/////////////////////////////////////////////////////////////////////////////// +/***END OF ORIGINAL LINUX KERNEL HEADER MINIMALIZED + ***FEW AUX FUNCS TO SUPPORT SpMM-Symbolic + ***/ +#include +#include "config.h" +typedef struct{ + idx_t key; + struct rb_node rb; + + /* following fields used for testing augmented rbtree functionality + u32 val; + u32 augmented; ///only for AUGMENTED_TEST + */ +} rbNode; +//static struct rb_root_cached root = RB_ROOT_CACHED; + +typedef struct rb_root_cached rbRoot; + + +/* + * return 1 if @node with the given key @key + * has been inserted in rbtree rooted at @root; 0 otherwise + */ +static inline int rbInsertStdNewKey(rbRoot *root,rbNode *node, idx_t key) +{ + struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; + idx_t parentK; + + while (*new) { + parent = *new; + parentK = rb_entry(parent, rbNode, rb)->key; + if (key < parentK) new = &parent->rb_left; + else if (key > parentK) new = &parent->rb_right; + else return 0; //already in + DEBUGCHECKS assert( *new != parent ); + } + //insert the node in the correct position, assigning the key + /*DEBUGCHECKS{ //check for double insertion + rbNode testNode; + memset(&testNode,0,sizeof(testNode)); + assert( !memcmp(node,&testNode,sizeof(*node)) ); + for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)) + assert( n != *new ); + }*/ + + node->key = key; + rb_link_node(&node->rb, parent, new); + rb_insert_color(&node->rb, &root->rb_root); + return 1; +} + +static inline int rbInsertCachedNewKey(rbRoot *root,rbNode *node, idx_t key) +{ + struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; + idx_t parentK; + bool leftmost = true; + + while (*new) { + parent = *new; + parentK = rb_entry(parent, rbNode, rb)->key; + if (key < parentK) new = &parent->rb_left; + else if (key > parentK) { + new = &parent->rb_right; + leftmost = false; + } + else return 0; + } + + //insert the node in the correct position, assigning the key + /*DEBUGCHECKS{ //check for double insertion + rbNode testNode; + memset(&testNode,0,sizeof(testNode)); + assert( !memcmp(node,&testNode,sizeof(*node)) ); + for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)) + assert( n != *new ); + }*/ + + node->key = key; + rb_link_node(&node->rb, parent, new); + rb_insert_color_cached(&node->rb, root, leftmost); + return 1; +} + +static inline int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key){ + #if RB_CACHED_INSERT == TRUE + return rbInsertCachedNewKey(root,node,key); + #else + return rbInsertStdNewKey(root,node,key); + #endif +} + +#define rbNodeOrderedVisit(n,root) \ + for (n = rb_first(&root->rb_root); n; n = rb_next(n)); + +inline void cleanRbNodes(rbRoot* root,rbNode* nodes,idx_t nodesNum){ + memset(nodes,0,nodesNum * sizeof(*nodes)); + memset(root,0,sizeof(*root)); +} +#endif /* _LINUX_RBTREE_H */ diff --git a/base/serial/impl/sp3mm4amg/include/macros.h b/base/serial/impl/sp3mm4amg/include/macros.h new file mode 100644 index 00000000..c75017ee --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/macros.h @@ -0,0 +1,118 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef MACROS +#define MACROS + +#include +#include + +///aux macro-functions +#define ABS(a) ((a) > 0 ? (a) : -(a)) +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define AVG(a,b) ((a)/2 + (b)/2 + ((a)%2+(b)%2)/2) +#define SWAP(a,b) (a)=(a)^(b);(b)=(b)^(a);(a)=(a)^(b) +#define IN_RANGE(i,start,end) ( (start) <= (i) && (i) <= (end) ) + +//ceil(x/y) with integers +#define INT_DIV_CEIL(x,y) ( ( (x) - 1) / (y) + 1 ) +//2D ROW MAJOR indexing wrap compute +#define IDX2D(i,j,nCols) ((j) + (i)*(nCols)) + +///distribuite reminder @rem in group givin an extra +1 to the first @rem +#define UNIF_REMINDER_DISTRI(i,div,rem) \ + ( (div) + ( (i) < (rem) ? 1 : 0 ) ) +#define UNIF_REMINDER_DISTRI_STARTIDX(i,div,rem) \ + ( (i) * (div) + MIN( (i),(rem) ) ) +#define UNIF_REMINDER_DISTRI_ENDIDX(i,div,rem) \ + ( (i+1) * (div) + MIN( (i),(rem) ) ) +//shorter name alias +#define unifRemShare(i,div,rem) UNIF_REMINDER_DISTRI(i,div,rem) +#define unifRemShareStart(i,div,rem) UNIF_REMINDER_DISTRI_STARTIDX(i,div,rem) +#define unifRemShareEnd(i,div,rem) UNIF_REMINDER_DISTRI_ENDIDX( (i) , (div) , (rem) ) +#define unifRemShareBlock(i,div,rem) unifRemShareStart(i,div,rem), unifRemShare(i,div,rem) + +#define STATIC_ARR_ELEMENTS_N(arr) (sizeof( (arr) ) / (sizeof(*(arr)))) +////STRING UTILS +#define _STR(s) #s +#define STR(s) _STR(s) +///CONCATENATE +//Concatenate preprocessor tokens A and B WITHOUT expanding macro definitions +#define _CAT(a,b) a ## b +#define _CAT3(a,b,c) a ## b ## c +#define _CAT4(a,b,c,d) a ## b ## c ## d +//Concatenate preprocessor tokens A and B EXPANDING macro definitions +#define CAT(a,b) _CAT(a,b) +#define CAT3(a,b,c) _CAT3(a,b,c) +#define CAT4(a,b,c,d) _CAT4(a,b,c,d) + +#define _UNDEF _ +////PRINTS +#define CHIGHLIGHT "\33[1m\33[92m" +#define CCC CHIGHLIGHT +#define CHIGHLIGHTERR "\33[31m\33[1m\33[44m" +#define CCCERR CHIGHLIGHTERR +#define CEND "\33[0m" +#define hprintsf(str,...) printf( CHIGHLIGHT str CEND,__VA_ARGS__ ) +#define hprintf(str) printf( CHIGHLIGHT str CEND) +#define ERRPRINTS(str,...) fprintf( stderr, CHIGHLIGHTERR str CEND,__VA_ARGS__ ) +#define ERRPRINT(str) fprintf( stderr, CHIGHLIGHTERR str CEND ) + +#include + +///aux types +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef char bool; +#define TRUE (!0) +#define FALSE 0 +#define true TRUE +#define false FALSE +#define T TRUE +#define F FALSE +//smart decimal type custom precision def build macro _DECIMAL_TRGT_PREC +#ifndef _DECIMAL_TRGT_PREC +//dflt floating point precision & formatting chars + #define _DECIMAL_TRGT_PREC double + #define _DECIMAL_TRGT_PREC_PR "%lf" +#else + //TODO SELECT WITH RESPECT TO THE EXPORTED TARGET DECIMAL TYPE + #define _DECIMAL_TRGT_PREC_PR "%f" +#endif +typedef _DECIMAL_TRGT_PREC decimal; + + +///EXTRA INCLUDE --- cuda +///assertionn are disabled at compile time by defining the NDEBUG preprocessor macro before including assert.h s +//#ifdef ASSERT #include #endif + +#endif //MACROS diff --git a/base/serial/impl/sp3mm4amg/include/macrosLinuxMock.h b/base/serial/impl/sp3mm4amg/include/macrosLinuxMock.h new file mode 100644 index 00000000..6b323330 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/macrosLinuxMock.h @@ -0,0 +1,140 @@ +/* + * RedBlackTree_linux_userspace + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MACROSLINUXMOCK +#define MACROSLINUXMOCK + +////////////////////////// LINUX KERNEL Userspaceing RBTree////////////////////////////// +typedef unsigned int u32; +typedef unsigned int cycles_t; + +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) + +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * TODO LESS_DEPENDENCIES + removed: + BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ + !__same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + + */ +#define container_of(ptr, type, member) ({ \ + void *__mptr = (void *)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); }) + +#define NOOP(x) x +//#define NOOP do {} while(0) +#undef unlikely +#define unlikely(x) NOOP(x) +/* + * Yes, this permits 64-bit accesses on 32-bit architectures. These will + * actually be atomic in some cases (namely Armv7 + LPAE), but for others we + * rely on the access being split into 2x32-bit accesses for a 32-bit quantity + * (e.g. a virtual address) and a strong prevailing wind. + */ +#define compiletime_assert_rwonce_type(t) \ + compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ + "Unsupported access size for {READ,WRITE}_ONCE().") +///TODO OVERWRITTEN FOR PORTING +#undef compiletime_assert_rwonce_type +#define compiletime_assert_rwonce_type(t) NOOP(t) +/* + * Use __READ_ONCE() instead of READ_ONCE() if you do not require any + * atomicity. Note that this may result in tears! + */ +#ifndef __READ_ONCE +#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x)) +#endif + +#define READ_ONCE(x) \ +({ \ + compiletime_assert_rwonce_type(x); \ + __READ_ONCE(x); \ +}) + +#define __WRITE_ONCE(x, val) \ +do { \ + *(volatile typeof(x) *)&(x) = (val); \ +} while (0) + +/*//TODO ORIGNAL VERSION +#define WRITE_ONCE(x, val) \ +do { \ + compiletime_assert_rwonce_type(x); \ + __WRITE_ONCE(x, val); \ +} while (0) +*/ +#define WRITE_ONCE(x,val) x = val + +/* + * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a + * word from memory atomically but without telling KASAN/KCSAN. This is + * usually used by unwinding code when walking the stack of a running process. + */ +#define READ_ONCE_NOCHECK(x) \ +({ \ + compiletime_assert(sizeof(x) == sizeof(unsigned long), \ + "Unsupported access size for READ_ONCE_NOCHECK()."); \ + (typeof(x))__read_once_word_nocheck(&(x)); \ +}) + +#include //rdtsc +static inline cycles_t get_cycles(void) +{ + + /**#ifndef CONFIG_X86_TSC + if (!boot_cpu_has(X86_FEATURE_TSC)) + return 0; + #endif */ ///TODO LESS_DEPENDENCIES + + return __rdtsc(); +} + +/**#define WARN_ON_ONCE(condition) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) */ //TODO LESS_DEPENDENCIES +#include +#define WARN_ON_ONCE(condition) assert( !(condition) ) + +#define div_u64(a,b) ( a / b ) +#define kfree(x) free(x) + +#endif //MACROSLINUXMOCK diff --git a/base/serial/impl/sp3mm4amg/include/mmio.h b/base/serial/impl/sp3mm4amg/include/mmio.h new file mode 100644 index 00000000..7cfd0a1b --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/mmio.h @@ -0,0 +1,133 @@ +/* +* Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ + +#ifndef MM_IO_H +#define MM_IO_H + +#define MM_MAX_LINE_LENGTH 1025 +#define MatrixMarketBanner "%%MatrixMarket" +#define MM_MAX_TOKEN_LENGTH 64 + +typedef char MM_typecode[4]; + +char *mm_typecode_to_str(MM_typecode matcode); + +int mm_read_banner(FILE *f, MM_typecode *matcode); +int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz); +int mm_read_mtx_array_size(FILE *f, int *M, int *N); + +int mm_write_banner(FILE *f, MM_typecode matcode); +int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz); +int mm_write_mtx_array_size(FILE *f, int M, int N); + + +/********************* MM_typecode query fucntions ***************************/ + +#define mm_is_matrix(typecode) ((typecode)[0]=='M') + +#define mm_is_sparse(typecode) ((typecode)[1]=='C') +#define mm_is_coordinate(typecode)((typecode)[1]=='C') +#define mm_is_dense(typecode) ((typecode)[1]=='A') +#define mm_is_array(typecode) ((typecode)[1]=='A') + +#define mm_is_complex(typecode) ((typecode)[2]=='C') +#define mm_is_real(typecode) ((typecode)[2]=='R') +#define mm_is_pattern(typecode) ((typecode)[2]=='P') +#define mm_is_integer(typecode) ((typecode)[2]=='I') + +#define mm_is_symmetric(typecode)((typecode)[3]=='S') +#define mm_is_general(typecode) ((typecode)[3]=='G') +#define mm_is_skew(typecode) ((typecode)[3]=='K') +#define mm_is_hermitian(typecode)((typecode)[3]=='H') + +int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ + + +/********************* MM_typecode modify fucntions ***************************/ + +#define mm_set_matrix(typecode) ((*typecode)[0]='M') +#define mm_set_coordinate(typecode) ((*typecode)[1]='C') +#define mm_set_array(typecode) ((*typecode)[1]='A') +#define mm_set_dense(typecode) mm_set_array(typecode) +#define mm_set_sparse(typecode) mm_set_coordinate(typecode) + +#define mm_set_complex(typecode)((*typecode)[2]='C') +#define mm_set_real(typecode) ((*typecode)[2]='R') +#define mm_set_pattern(typecode)((*typecode)[2]='P') +#define mm_set_integer(typecode)((*typecode)[2]='I') + + +#define mm_set_symmetric(typecode)((*typecode)[3]='S') +#define mm_set_general(typecode)((*typecode)[3]='G') +#define mm_set_skew(typecode) ((*typecode)[3]='K') +#define mm_set_hermitian(typecode)((*typecode)[3]='H') + +#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \ + (*typecode)[2]=' ',(*typecode)[3]='G') + +#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode) + + +/********************* Matrix Market error codes ***************************/ + + +#define MM_COULD_NOT_READ_FILE 11 +#define MM_PREMATURE_EOF 12 +#define MM_NOT_MTX 13 +#define MM_NO_HEADER 14 +#define MM_UNSUPPORTED_TYPE 15 +#define MM_LINE_TOO_LONG 16 +#define MM_COULD_NOT_WRITE_FILE 17 + + +/******************** Matrix Market internal definitions ******************** + + MM_matrix_typecode: 4-character sequence + + ojbect sparse/ data storage + dense type scheme + + string position: [0] [1] [2] [3] + + Matrix typecode: M(atrix) C(oord) R(eal) G(eneral) + A(array) C(omplex) H(ermitian) + P(attern) S(ymmetric) + I(nteger) K(kew) + + ***********************************************************************/ + +#define MM_MTX_STR "matrix" +#define MM_ARRAY_STR "array" +#define MM_DENSE_STR "array" +#define MM_COORDINATE_STR "coordinate" +#define MM_SPARSE_STR "coordinate" +#define MM_COMPLEX_STR "complex" +#define MM_REAL_STR "real" +#define MM_INT_STR "integer" +#define MM_GENERAL_STR "general" +#define MM_SYMM_STR "symmetric" +#define MM_HERM_STR "hermitian" +#define MM_SKEW_STR "skew-symmetric" +#define MM_PATTERN_STR "pattern" + + +/* high level routines */ + +int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[], + double val[], MM_typecode matcode); +int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[], + double val[], MM_typecode matcode); +int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img, + MM_typecode matcode); + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, + double **val_, int **I_, int **J_); + + + +#endif diff --git a/base/serial/impl/sp3mm4amg/include/ompChunksDivide.h b/base/serial/impl/sp3mm4amg/include/ompChunksDivide.h new file mode 100644 index 00000000..5436d1af --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/ompChunksDivide.h @@ -0,0 +1,105 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef OMPCHUNKSDIVIDE +#define OMPCHUNKSDIVIDE + +/* + * chunks distribution for runtime schedule setted to dynamic, TODO guided + * generic interface with elements, srcMatrix, configuration + * TODO the srcMatrix is leaved for advanced chunk distribution... + * configuration is expected to have a valid number of threadNum setted + */ +#include "config.h" +//distribution of @rows|blocks of @matrix, exploiting @config +typedef void (CHUNKS_DISTR ) (ulong,spmat*,CONFIG*); +typedef void (*CHUNKS_DISTR_INTERF ) (ulong,spmat*,CONFIG*); + +//NOOP chunks division for manual configuration via export OMP_SCHEDULE +inline void chunksNOOP(ulong r,spmat* mat,CONFIG* cfg){ return; } +#include "ompGetICV.h" +//fair division of @r elements from matrix @mat with threads num in @cfg +inline void chunksFair(ulong r,spmat* mat,CONFIG* cfg){ + assert(cfg->threadNum > 0); //configured target thread num + omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic; + omp_get_schedule(&kind,&chunk_size); + k = kind; + #if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0 + monotonic = omp_sched_monotonic & kind; + if(monotonic) k = kind-omp_sched_monotonic; + #endif + (void) monotonic; //else no unused warning + switch(k){ + case omp_sched_static : + DEBUG printf("static it's already fair\n"); + return; + case omp_sched_dynamic: + chunk_size_new = MAX(1,r/cfg->threadNum); + break; + //case omp_sched_guided : + //case omp_sched_auto : + //default: + } + if(chunk_size == chunk_size_new) return; + omp_set_schedule(kind,chunk_size_new); + VERBOSE printf("chunksFair:\tchunk adapted to %d\n",chunk_size_new); + DEBUG ompGetRuntimeSchedule(NULL); +} +//fair division of @r elements from matrix @mat of threads in @cfg +//subdividing the fair share with a factor of @FAIR_CHUNKS_FOLDING +inline void chunksFairFolded(ulong r,spmat* mat,CONFIG* cfg){ + assert(cfg->threadNum > 0); //configured target thread num + omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic; + omp_get_schedule(&kind,&chunk_size); + k = kind; + #if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0 + monotonic = omp_sched_monotonic & kind; + if(monotonic) k = kind-omp_sched_monotonic; + #endif + (void) monotonic; //else no unused warning + switch(k){ + case omp_sched_static : + DEBUG printf("static it's already fair\n"); + return; + case omp_sched_dynamic: + chunk_size_new = MAX(1,r/(cfg->threadNum*FAIR_CHUNKS_FOLDING)); + break; + //case omp_sched_guided : + //case omp_sched_auto : + //default: + } + if(chunk_size == chunk_size_new) return; + omp_set_schedule(kind,chunk_size_new); + DEBUG{ //check with ICV get + printf("chunksFairFolded:\tchunk adapted to %d\n",chunk_size_new); + ompGetRuntimeSchedule(NULL); + } +} +#endif diff --git a/base/serial/impl/sp3mm4amg/include/ompGetICV.h b/base/serial/impl/sp3mm4amg/include/ompGetICV.h new file mode 100644 index 00000000..6e545a1c --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/ompGetICV.h @@ -0,0 +1,41 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef OMPGETICV_H +#define OMPGETICV_H +//only header definitions +/* + * log sched configuration on stdout + * return kind,monotonic,chunkSize if arg0 not NULL + */ +void ompGetRuntimeSchedule(int* ); +void ompGetAllICV(); //only if not OMP_GET_ICV_MAIN +float ompVersionMacroMap(); //version number as float using API dates mappings +#endif diff --git a/base/serial/impl/sp3mm4amg/include/ompi_config_minimal.h b/base/serial/impl/sp3mm4amg/include/ompi_config_minimal.h new file mode 100644 index 00000000..88e4ccb9 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/ompi_config_minimal.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2021 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2021 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved. + * Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2011-2013 INRIA. All rights reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015-2021 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2019 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2021-2022 Google, LLC. All rights reserved. + * Copyright (c) 2021-2022 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * Copyright (c) 2021 Bull S.A.S. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/* + * Error classes and codes + * Do not change the values of these without also modifying mpif.h.in. + */ +#define MPI_SUCCESS 0 +#define MPI_ERR_BUFFER 1 +#define MPI_ERR_COUNT 2 +#define MPI_ERR_TYPE 3 +#define MPI_ERR_TAG 4 +#define MPI_ERR_COMM 5 +#define MPI_ERR_RANK 6 +#define MPI_ERR_REQUEST 7 +#define MPI_ERR_ROOT 8 +#define MPI_ERR_GROUP 9 +#define MPI_ERR_OP 10 +#define MPI_ERR_TOPOLOGY 11 +#define MPI_ERR_DIMS 12 +#define MPI_ERR_ARG 13 +#define MPI_ERR_UNKNOWN 14 +#define MPI_ERR_TRUNCATE 15 +#define MPI_ERR_OTHER 16 +#define MPI_ERR_INTERN 17 +#define MPI_ERR_IN_STATUS 18 +#define MPI_ERR_PENDING 19 +#define MPI_ERR_ACCESS 20 +#define MPI_ERR_AMODE 21 +#define MPI_ERR_ASSERT 22 +#define MPI_ERR_BAD_FILE 23 +#define MPI_ERR_BASE 24 +#define MPI_ERR_CONVERSION 25 +#define MPI_ERR_DISP 26 +#define MPI_ERR_DUP_DATAREP 27 +#define MPI_ERR_FILE_EXISTS 28 +#define MPI_ERR_FILE_IN_USE 29 +#define MPI_ERR_FILE 30 +#define MPI_ERR_INFO_KEY 31 +#define MPI_ERR_INFO_NOKEY 32 +#define MPI_ERR_INFO_VALUE 33 +#define MPI_ERR_INFO 34 +#define MPI_ERR_IO 35 +#define MPI_ERR_KEYVAL 36 +#define MPI_ERR_LOCKTYPE 37 +#define MPI_ERR_NAME 38 +#define MPI_ERR_NO_MEM 39 +#define MPI_ERR_NOT_SAME 40 +#define MPI_ERR_NO_SPACE 41 +#define MPI_ERR_NO_SUCH_FILE 42 +#define MPI_ERR_PORT 43 +#define MPI_ERR_QUOTA 44 +#define MPI_ERR_READ_ONLY 45 +#define MPI_ERR_RMA_CONFLICT 46 +#define MPI_ERR_RMA_SYNC 47 +#define MPI_ERR_SERVICE 48 +#define MPI_ERR_SIZE 49 +#define MPI_ERR_SPAWN 50 +#define MPI_ERR_UNSUPPORTED_DATAREP 51 +#define MPI_ERR_UNSUPPORTED_OPERATION 52 +#define MPI_ERR_WIN 53 + +#define MPI_T_ERR_MEMORY 54 +#define MPI_T_ERR_NOT_INITIALIZED 55 +#define MPI_T_ERR_CANNOT_INIT 56 +#define MPI_T_ERR_INVALID_INDEX 57 +#define MPI_T_ERR_INVALID_ITEM 58 +#define MPI_T_ERR_INVALID_HANDLE 59 +#define MPI_T_ERR_OUT_OF_HANDLES 60 +#define MPI_T_ERR_OUT_OF_SESSIONS 61 +#define MPI_T_ERR_INVALID_SESSION 62 +#define MPI_T_ERR_CVAR_SET_NOT_NOW 63 +#define MPI_T_ERR_CVAR_SET_NEVER 64 +#define MPI_T_ERR_PVAR_NO_STARTSTOP 65 +#define MPI_T_ERR_PVAR_NO_WRITE 66 +#define MPI_T_ERR_PVAR_NO_ATOMIC 67 +#define MPI_ERR_RMA_RANGE 68 +#define MPI_ERR_RMA_ATTACH 69 +#define MPI_ERR_RMA_FLAVOR 70 +#define MPI_ERR_RMA_SHARED 71 +#define MPI_T_ERR_INVALID 72 +#define MPI_T_ERR_INVALID_NAME 73 +#define MPI_ERR_PROC_ABORTED 74 + +/* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */ +#define MPI_ERR_PROC_FAILED 75 +#define MPI_ERR_PROC_FAILED_PENDING 76 +#define MPI_ERR_REVOKED 77 + +/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined + MPI_ERR_ code. Set the last code to allow some room for adding + error codes without breaking ABI. */ +#define MPI_ERR_LASTCODE 92 + +/* + * Comparison results. Don't change the order of these, the group + * comparison functions rely on it. + * Do not change the order of these without also modifying mpif.h.in. + */ +enum { + MPI_IDENT, + MPI_CONGRUENT, + MPI_SIMILAR, + MPI_UNEQUAL +}; + +/* + * MPI_Init_thread constants + * Do not change the order of these without also modifying mpif.h.in. + */ +enum { + MPI_THREAD_SINGLE, + MPI_THREAD_FUNNELED, + MPI_THREAD_SERIALIZED, + MPI_THREAD_MULTIPLE +}; + +/* + * Datatype combiners. + * Do not change the order of these without also modifying mpif.h.in. + * (see also mpif-common.h.fin). + */ +enum { + MPI_COMBINER_NAMED, + MPI_COMBINER_DUP, + MPI_COMBINER_CONTIGUOUS, + MPI_COMBINER_VECTOR, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_HVECTOR_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_HVECTOR, + MPI_COMBINER_INDEXED, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_HINDEXED_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_HINDEXED, + MPI_COMBINER_INDEXED_BLOCK, +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS) + MPI_COMBINER_STRUCT_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER, /* preserve ABI compatibility */ +#endif + MPI_COMBINER_STRUCT, + MPI_COMBINER_SUBARRAY, + MPI_COMBINER_DARRAY, + MPI_COMBINER_F90_REAL, + MPI_COMBINER_F90_COMPLEX, + MPI_COMBINER_F90_INTEGER, + MPI_COMBINER_RESIZED, + MPI_COMBINER_HINDEXED_BLOCK +}; + +////MOCK ERRHANDELERS +#define OMPI_ERRHANDLER_INVOKE(comm,err,fname) err +#define OMPI_ERRHANDLER_NOHANDLE_INVOKE(err,fname) err + diff --git a/base/serial/impl/sp3mm4amg/include/parser.h b/base/serial/impl/sp3mm4amg/include/parser.h new file mode 100644 index 00000000..c482f7e1 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/parser.h @@ -0,0 +1,90 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef PARSER +#define PARSER + +#include "mmio.h" +#include "sparseMatrix.h" + +typedef struct{ + ulong row; + ulong col; + double val; +} entry; //MatrixMarket COO entry + +typedef struct{ + MM_typecode mcode; + entry* entries; + ulong* rowLens; + ulong M,N,NZ; //spmat sizes +} MatrixMarket; + +////COO PARSE +//parse and check MatrixMarket mat in @matPath file +MatrixMarket* MMRead(char* matPath); +void freeMatrixMarket(MatrixMarket* mm); +//basic check for sparse matrix compliance to the app, return posix bool +int MMCheck(MM_typecode typecode); +/* + * parse MatrixMarket matrix entries in @fp, of type @mcode + * into COOrdinate list of entries + * -> expand simmetric matrixes into a normal matrix with both parts + * so NZ will be inplace doubled + * return allocated and filled COO entries with the NNZ number into + * NO SORT CHECKING HERE + */ +entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens); + +////COO -> ANYTHING ELSE CONVERSION +/* + * write COO entries in @entries inside sparse matrix @mat in ELL format + * EXPECTED: CSR arrays allocated, @entries col sorted in (not madatory consecut) rows + * [simmetrical parts explicitly rappresented --> not important here] + */ +int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens); +/* + * write COO entries in @entries inside sparse matrix @mat in ELL format + * EXPECTED: @entries col sorted in (not madatory consecut) rows + * ELL internal array allocated in this function, not freed in case of error + */ +int COOtoELL(entry* entries, spmat* mat, ulong* rowLens); +////wrapper MM -> specialized target +/* + * Parse MatrixMarket matrix stored in file at @matPath + * IMPLEMENTED WRAPPING: MMtoCOO -> COOtoCSR + * Returns: allocated spmat sparse matrix with all field allocated + * symetric matrix are expanded in a full matrix + */ +spmat* MMtoCSR(char* matPath); +spmat* MMtoELL(char* matPath); + + +#endif diff --git a/base/serial/impl/sp3mm4amg/include/sparseMatrix.h b/base/serial/impl/sp3mm4amg/include/sparseMatrix.h new file mode 100644 index 00000000..8fc769e8 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/sparseMatrix.h @@ -0,0 +1,216 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +//sparse matrix def & aux +#ifndef SPARSEMATRIX +#define SPARSEMATRIX + +#include +#include + +#include "macros.h" +#include "config.h" + +typedef struct { + idx_t NZ,M,N; + double *AS; + idx_t* JA; + //CSR SPECIFIC + idx_t* IRP; + #ifdef ROWLENS + idx_t* RL; //row lengths + #endif + //CUDA SPECIFIC + idx_t MAX_ROW_NZ; + +} spmat; //describe a sparse matrix + +//smart index keeping in a dense map +typedef struct{ + idx_t len; //num of nnz idx accumulated + /* nnz index presence packing, implict space enough for all possible indexes*/ + nnz_idxs_flags_t idxsMap; + uint idxsMapN; //either num of limbs or len of char flag array +} SPVECT_IDX_DENSE_MAP; +int initSpVectIdxDenseAcc(idx_t idxMax, SPVECT_IDX_DENSE_MAP* ); +inline void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc){ + acc->len = 0; + memset(acc->idxsMap,0,sizeof(*acc->idxsMap)*acc->idxsMapN); +} +inline void _freeIdxMap(SPVECT_IDX_DENSE_MAP* acc){ + free(acc->idxsMap); + free(acc); +} +//aux struct for sparse vector-scalar product accumualtion +typedef struct{ + double* v; //aux accumulating dense vector (sparse) + idx_t* nnzIdx; //v nnz value's indexes (contiguos) + idx_t vLen; //size of the aux dense vector //TODO USELESS? + SPVECT_IDX_DENSE_MAP nnzIdxMap; // +} ACC_DENSE; +int allocAccDense(ACC_DENSE* v, ulong size); + +/* + **** NNZ IDXS PRESENCE FLAGS ACCESS INTERFACE: *** + * sp_vect_idx_set(idx,SPVECT_IDX_DENSE_MAP) + * -> return 0 if idx isn't already setted and in case set it + */ +inline int spVect_idx_in(idx_t idx, SPVECT_IDX_DENSE_MAP* idxsMapAcc){ + #if SPVECT_IDX_BITWISE == TRUE + uint limbID = idx / LIMB_SIZE_BIT; //idx's limb id + uint limbIdxID = idx % LIMB_SIZE_BIT; //idx's pos in limb + DEBUGCHECKS assert( limbID < idxsMapAcc->idxsMapN ); + limb_t idxPos = ((limb_t) 1) << limbIdxID; + if (!(idxsMapAcc->idxsMap[limbID] & idxPos) ){ + idxsMapAcc->idxsMap[limbID] |= idxPos; + idxsMapAcc->len++; + return 0; + } + #else + //assert( idx < idxsMapAcc->idxsMapN ); //TODO + if (!( idxsMapAcc->idxsMap[idx] )){ //TODO usabile primitiva atomica di cmpswap, accorpamento ops a livello HW? + idxsMapAcc->idxsMap[idx] = 1; + idxsMapAcc->len++; + return 0; + } + #endif //SPVECT_IDX_BITWISE == TRUE + return 1; +} +inline void _resetAccVect(ACC_DENSE* acc){ + memset(acc->v, 0, acc->vLen * sizeof(*(acc->v))); + memset(acc->nnzIdx, 0, acc->vLen * sizeof(*(acc->nnzIdx))); + _resetIdxMap(&acc->nnzIdxMap); +} +////Sparse vector accumulator -- corresponding to a matrix portion +typedef struct{ + //idx_t r; //row index in the corresponding matrix + //idx_t c; //col index in the corresponding matrix + idx_t len; //rowLen + double* AS; //row nnz values + idx_t* JA; //row nnz colIndexes +} SPACC; + + +/* + * ARRAY BISECTION - RECURSIVE VERSION + * TODO ASSERT LEN>0 ommitted + */ +inline int BISECT_ARRAY(idx_t target, idx_t* arr, idx_t len){ + //if (len == 0) return FALSE; + if (len <= 1) return *arr == target; + idx_t middleIdx = (len-1) / 2; //len=5-->2, len=4-->1 + idx_t middle = arr[ middleIdx ]; + if (target == middle) return TRUE; + else if (target < middle) return BISECT_ARRAY(target,arr,middleIdx); + else return BISECT_ARRAY(target,arr+middleIdx+1,middleIdx + (len-1)%2); +} + +/* + * return !0 if col @j idx is in row @i of sparse mat @smat + * bisection used --> O(log_2(ROWLENGHT)) + */ +inline int IS_NNZ(spmat* smat,idx_t i,idx_t j){ + idx_t rStart = smat->IRP[i]; + idx_t rLen = smat->IRP[i+1] - rStart; + if (!rLen) return FALSE; + return BISECT_ARRAY(j,smat->JA + rStart,rLen); +} +inline int IS_NNZ_linear(spmat* smat,idx_t i,idx_t j){ //linear -> O(ROWLENGHT) + int out = 0; + for (idx_t x=smat->IRP[i]; xIRP[i+1] && !out; x++){ + out = (j == smat->JA[x]); + } + return out; +} +////aux functions +//free sparse matrix +inline void freeSpmatInternal(spmat* mat){ + if(!mat) return; + free(mat->AS); + free(mat->JA); + free(mat->IRP); +#ifdef ROWLENS + free(mat->RL); +#endif +} + +inline void freeSpmat(spmat* mat){ + if (!mat) return; + freeSpmatInternal(mat); + free(mat); +} + +static inline void zeroSpmat(spmat* m) { + memset(m->AS, 0, sizeof(*(m->AS)) * m->NZ); + memset(m->JA, 0, sizeof(*(m->JA)) * m->NZ); + memset(m->IRP, 0, sizeof(*(m->IRP)) * (m->M + 1)); +} + +//free max aux structs not NULL pointed +inline void freeSpAcc(SPACC* r){ + free(r->AS); + free(r->JA); +} +////alloc&init functions +//alloc&init internal structures only dependent of dimensions @rows,@cols +inline int allocSpMatrixInternal(idx_t rows, idx_t cols, spmat* mat){ + mat -> M = rows; + mat -> N = cols; + if (!(mat->IRP=calloc(mat->M+1,sizeof(*(mat->IRP))))){ //calloc only for 0th + ERRPRINT("IRP calloc err\n"); + freeSpmatInternal(mat); + return EXIT_FAILURE; + } +#ifdef ROWLENS + if (!(mat->RL = malloc(mat->M*sizeof(*(mat->RL))))){ + ERRPRINT("RL calloc err\n"); + freeSpmatInternal(mat); + return EXIT_FAILURE; + } +#endif + return EXIT_SUCCESS; +} + +//alloc a sparse matrix of @rows rows and @cols cols +inline spmat* allocSpMatrix(idx_t rows, idx_t cols){ + + spmat* mat; + if (!(mat = calloc(1,sizeof(*mat)))) { + ERRPRINT("mat calloc failed\n"); + return NULL; + } + if (allocSpMatrixInternal(rows,cols,mat)){ + free(mat); + return NULL; + } + return mat; +} + +#endif diff --git a/base/serial/impl/sp3mm4amg/include/sparseUtilsGeneric.h b/base/serial/impl/sp3mm4amg/include/sparseUtilsGeneric.h new file mode 100644 index 00000000..fbcab700 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/sparseUtilsGeneric.h @@ -0,0 +1,138 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include "sparseMatrix.h" +#include "config.h" + +#ifndef OFF_F + #pragma error OFF_F required +#endif +//////////////////////////////// CSR SPECIFIC ///////////////////////////////// +///SPARSE MATRIX PARTITIONING +/* + * partition CSR sparse matrix @A in @gridCols columns partitions + * returning an offsets matrix out[i][j] = start of jth colPartition of row i + * subdivide @A columns in uniform cols ranges in the output + */ +idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols); + +/* + * partition CSR sparse matrix @A in @gridCols columns partitions as + * indipended and allocated sparse matrixes and return them + * subdivide @A columns in uniform cols ranges in the output + */ +spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols); +//same as above but with (aux) use of offsets partitoning (also returned if colOffsets!=NULL +spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols,idx_t** colPartsOffsets); + +//same as checkOverallocPercent but with 2D partitioning - CSR col partitioning +void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB, + idx_t gridCols,idx_t* bColOffsets); +/////////////////////////////////////////////////////////////////////////////// + +///Single implementations headers +#ifndef SPARSEUTILS_H_COMMON_IDX_IMPLS +#define SPARSEUTILS_H_COMMON_IDX_IMPLS + +//shift every index about the sparse data for use the matrix in a fortran app +inline void C_FortranShiftIdxs(spmat* m){ + for(ulong r=0; rM+1; m -> IRP[r]++,r++); + for(ulong i=0; iNZ; m -> JA[i]++, i++); +} +//shift every index about the sparse data for use the matric in a C app +inline void Fortran_C_ShiftIdxs(spmat* m){ //TODO DBG ONLY and compleatness + for(ulong r=0; rM+1; m -> IRP[r]--,r++); + for(ulong i=0; iNZ; m -> JA[i]--, i++); +} + +/* + * check SpMM resulting matrix @AB = A * B nnz distribution in rows + * with the preallocated, forecasted size in @forecastedSizes + * in @forecastedSizes there's for each row -> forecasted size + * and in the last entry the cumulative of the whole matrix + */ +void checkOverallocPercent(ulong* forecastedSizes,spmat* AB); +/* + check if sparse matrixes A<->B differ up to + DOUBLE_DIFF_THREASH per element +*/ +int spmatDiff(spmat* A, spmat* B); +////dyn alloc of spMM output matrix +/* +///size prediction of AB = @A * @B +inline ulong SpMMPreAlloc(spmat* A,spmat* B){ + //TODO BETTER PREALLOC HEURISTICS HERE + return MAX(A->NZ,B->NZ); +} +//init a sparse matrix AB=@A * @B with a initial allocated space by an euristic +inline spmat* initSpMatrixSpMM(spmat* A, spmat* B){ + spmat* out; + if (!(out = allocSpMatrix(A->M,B->N))) return NULL; + out -> NZ = SpMMPreAlloc(A,B); + if (!(out->AS = malloc(out->NZ*sizeof(*(out->AS))))){ + ERRPRINT("initSpMatrix: out->AS malloc errd\n"); + free(out); + return NULL; + } + if (!(out->JA = malloc(out->NZ*sizeof(*(out->JA))))){ + ERRPRINT("initSpMatrix: out->JA malloc errd\n"); + freeSpmat(out); + return NULL; + } + return out; +} + +#define REALLOC_FACTOR 1.5 +//realloc sparse matrix NZ arrays +inline int reallocSpMatrix(spmat* mat,ulong newSize){ + mat->NZ *= newSize; + void* tmp; + if (!(tmp = realloc(mat->AS,mat->NZ * sizeof(*(mat->AS))))){ + ERRPRINT("reallocSpMatrix: realloc AS errd\n"); + return EXIT_FAILURE; + } + mat->AS = tmp; + if (!(tmp = realloc(mat->JA,mat->NZ * sizeof(*(mat->JA))))){ + ERRPRINT("reallocSpMatrix: realloc JA errd\n"); + return EXIT_FAILURE; + } + mat->JA = tmp; + return EXIT_SUCCESS; +} +*/ +////MISC +//print useful information about 3SPMM about to compute +void print3SPMMCore(spmat* R,spmat* AC,spmat* P,CONFIG* conf); +void printSparseMatrix(spmat* sparseMat,char justNZMarkers); +/*convert @sparseMat sparse matrix in dense matrix returned*/ +double* CSRToDense(spmat* sparseMat); + +void freeAccsDense(ACC_DENSE* vectors,ulong num); + +#endif //SPARSEUTILS_H_COMMON_IDX_IMPLS diff --git a/base/serial/impl/sp3mm4amg/include/sparseUtilsMulti.h b/base/serial/impl/sp3mm4amg/include/sparseUtilsMulti.h new file mode 100644 index 00000000..13478f35 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/sparseUtilsMulti.h @@ -0,0 +1,52 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SPARSEUTILSMULTI_H +#define SPARSEUTILSMULTI_H + +#ifdef OFF_F //save "includer" OFF_F value before overwriting it + #pragma push_macro("OFF_F") + #define _OFF_F_OLD + #undef OFF_F +#endif + +#define OFF_F 0 +#include "sparseUtilsGeneric.h" +#undef OFF_F + +#define OFF_F 1 +#include "sparseUtilsGeneric.h" +#undef OFF_F + +#ifdef _OFF_F_OLD + #pragma pop_macro("OFF_F") + #undef _OFF_F_OLD +#endif + +#endif diff --git a/base/serial/impl/sp3mm4amg/include/utils.h b/base/serial/impl/sp3mm4amg/include/utils.h new file mode 100644 index 00000000..8ef47492 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/include/utils.h @@ -0,0 +1,181 @@ +/* + * Sp3MM_for_AlgebraicMultiGrid + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef UTILS +#define UTILS + +//#include +#include "macros.h" + +#include "linuxK_rbtree_minimalized.h" +extern int urndFd; //file pointer to DRNG_DEVFILE O_RDONLY opened +int init_urndfd(); // wrap init urndFd + +///IO +//UNBUFFERED IO +/* + * urndFd usage template to populate random timeout + if(_read_wrap(urndFd,(char*)&timeout,sizeof(timeout))<0){ + fprintf(stderr,"rnd read for thread's timeout failed"); + ret=EXIT_FAILURE; + goto end; + } + */ +//wrap read cycle over @fd +int read_wrap(int fd,void* dst,size_t count); +int fread_wrap(FILE* fp,void* dst,size_t count); +//dual of read_wrap +int write_wrap(int fd,void* src,size_t count); +//create or open file at @outFpath for write +int createNewFile(char* const outFpath); +///STRUCTURED DATA IO +#define DOUBLE_STR_FORMAT "%25le\n" +//write double vector @v as row sequence of double at @fpath +//e.g. read with od -tf8 -w8 fpath : OCTALOFFSET: DOUBLE FULL DIGITS +int writeDoubleVector(char* fpath,double* v,ulong size); +/* + * read vector of double [Str] of arbitrary size from @fpath, true lenght in *size + * if size point to a nnz value, the initial allocation will be of *size + * eventual successive reallocation done multipling *size with VECTOR_STEP_REALLOC + */ +double* readDoubleVector(char* fpath,ulong* size); +double* readDoubleVectorStr(char* fpath,ulong* size); + +///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF +//dual of readDoubleVectorVector +int writeDoubleVectorAsStr(char* fpath,double* v,ulong size); +int MPI_Dims_create(int nnodes, int ndims, int dims[]); //commons/ompi_dims_create/ompi_dims_create.c + +#include "config.h" +///config from ENV +#define GRID_ROWS "GRID_ROWS" +#define GRID_COLS "GRID_COLS" +//parse configuration from env +int getConfig(CONFIG* conf); + +//append only list implemented with a reallocated array +typedef struct{ + ulong* a; + ulong size; + ulong lastIdx; +} APPENDARRAY; +//append @val to @list, reallocating if reached end +//TODO inline int appendArr(ulong val,APPENDARRAY* list); + +void sortuint(uint* arr, uint len); //sort uint array @arr of @len elements +void sort_idx_t(idx_t* arr, idx_t len); +void sortulong(ulong* arr, ulong len); +void sortRbNode(rbNode* arr,idx_t len); + +///ranges functions +/* + * considering @rangesN uniform ranges from [0,rangesEnd) + * return the rangeId that element @i is in + */ +inline ushort matchingUnifRangeIdxLinear(idx_t i,idx_t rangesEnd,ushort rangesN){ + double rangeW = rangesEnd / (double) rangesN; + for(idx_t rEnd = rangeW,j=0; j < rangesN; rEnd = rangeW * (j++)){ + if( i < rEnd ) return j; + } + assert( FALSE ); //i should be in a range! + return EXIT_FAILURE; +} +/* + * find which range @idx match among + * a uniform range divion of @size element in @rangesN ranges with carried reminder + * return 0based idx of matched range + */ +inline ushort matchingUnifRangeIdx(idx_t idx, idx_t size, ushort rangesN){ + idx_t rangeW = size/rangesN, rangeRem = size%rangesN; + idx_t searchStart,searchEnd; + //!IN_RANGE(idx,unifRemShareBlock(idx,rangeW,rangeRem)) && IN_RANGE(r,0,rangesN);){ + for(ushort r=rangesN/2-1, hMoveWidth=rangesN/2; hMoveWidth>0;hMoveWidth/=2){ + searchStart = unifRemShareStart(idx,rangeW,rangeRem); + searchEnd = unifRemShareEnd(idx,rangeW,rangeRem); + if (IN_RANGE(idx,searchStart,searchEnd)) return r; + else if (idx < searchStart) r = AVG(0,r); + else r = AVG(r,rangesN-1); + } + assert(FALSE); +} + +///reductionUtils +inline idx_t reductionSumSeq(idx_t* arr,idx_t arrLen){ + idx_t i,out; + for(i=0,out=0; i @tmpFsDecompressPath + * e.g. decompress xz -d -c @path > @tmpFsDecompressPath + * Returns: -1 if decompression wasn't possible otherwise decompress command exti status + */ +int extractInTmpFS(char* path, char* tmpFsDecompressPath); +//compute E[@values] in @out[0] and VAR[@values] in @out[1] of @numVals values +void statsAvgVar(double* values,uint numVals, double* out); +void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers); +void printVector(double* v,ulong size); + +void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen); +#endif diff --git a/base/serial/impl/sp3mm4amg/lib/linuxK_rbtree_minimalized.c b/base/serial/impl/sp3mm4amg/lib/linuxK_rbtree_minimalized.c new file mode 100644 index 00000000..7fe3bd58 --- /dev/null +++ b/base/serial/impl/sp3mm4amg/lib/linuxK_rbtree_minimalized.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + + linux/lib/rbtree.c +*/ +/* + * RedBlackTree_linux_userspace + * (C) Copyright 2021-2022 + * Andrea Di Iorio + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +///#include //TODO LESS_DEPENDENCIES +#include "linuxK_rbtree_minimalized.h" ///fulfill embedded deps needed //TODO LESS_DEPENDENCIES +//#include + +/* MINIMALIZED VERSION WITHOUT THE IMPORT OF rbtree_augmented + * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +/* + * Notes on lockless lookups: + * + * All stores to the tree structure (rb_left and rb_right) must be done using + * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the + * tree structure as seen in program order. + * + * These two requirements will allow lockless iteration of the tree -- not + * correct iteration mind you, tree rotations are not atomic so a lookup might + * miss entire subtrees. + * + * But they do guarantee that any such traversal will only see valid elements + * and that it will indeed complete -- does not get stuck in a loop. + * + * It also guarantees that if the lookup returns an element it is the 'correct' + * one. But not returning an element does _NOT_ mean it's not present. + * + * NOTE: + * + * Stores to __rb_parent_color are not important for simple lookups so those + * are left undone as of now. Nor did I check for loops involving parent + * pointers. + */ + +///rb_set_black //TODO ADDONLY_GENERIC + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +///TODO REMOVE AGUMENT +static __always_inline void +__rb_insert(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red. + */ + if (unlikely(!parent)) { + /* + * The inserted node is root. Either this is the + * first node, or we recursed at Case 1 below and + * are no longer violating 4). + */ + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } + + /* + * If there is a black parent, we are done. + * Otherwise, take some corrective action as, + * per 4), we don't want a red root or two + * consecutive red nodes. + */ + if(rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - node's uncle is red (color flips). + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - node's uncle is black and node is + * the parent's right child (left rotate at parent). + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + tmp = node->rb_left; + WRITE_ONCE(parent->rb_right, tmp); + WRITE_ONCE(node->rb_left, parent); + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + ///augment_rotate(parent, node); //TODO DEL AUGMENTED DEP + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - node's uncle is black and node is + * the parent's left child (right rotate at gparent). + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */ + WRITE_ONCE(parent->rb_right, gparent); + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + ///augment_rotate(gparent, parent); //TODO DEL_AUGMENTED_DEP + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + tmp = node->rb_right; + WRITE_ONCE(parent->rb_left, tmp); + WRITE_ONCE(node->rb_right, parent); + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + ///augment_rotate(parent, node); //TODO DEL_AUGMENTED_DEP + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */ + WRITE_ONCE(parent->rb_left, gparent); + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + ///augment_rotate(gparent, parent); //TODO DEL_AUGMENTED_DEP + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +///____rb_erase_color //TODO ADDONLY_GENERIC + +/* Non-inline version for rb_erase_augmented() use */ +///__rb_erase_color //TODO ADDONLY_GENERIC + +/* TODO LESS_DEPENDENCIES MINIMALIZED + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + .propagate = dummy_propagate, + .copy = dummy_copy, + .rotate = dummy_rotate +};*/ +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root); +} + +///rb_erase //TODO ADDONLY_GENERIC + +/** TODO LESS_DEPENDENCIES - AUGMENTED REMOVE + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ +///__rb_insert_augmented todo LESS_DEPENDENCIES - AUGMENTED REMOVE + +////// Traversing +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node = node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node = node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +///rb_replace_node //TODO ADDONLY_GENERIC +///rb_replace_node_rcu //TODO LESS_DEPENDENCIES + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/base/serial/impl/sp3mm4amg/lib/mmio.c b/base/serial/impl/sp3mm4amg/lib/mmio.c new file mode 100644 index 00000000..c250ff2a --- /dev/null +++ b/base/serial/impl/sp3mm4amg/lib/mmio.c @@ -0,0 +1,511 @@ +/* +* Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ + + +#include +#include +#include +#include + +#include "mmio.h" + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, + double **val_, int **I_, int **J_) +{ + FILE *f; + MM_typecode matcode; + int M, N, nz; + int i; + double *val; + int *I, *J; + + if ((f = fopen(fname, "r")) == NULL) + return -1; + + + if (mm_read_banner(f, &matcode) != 0) + { + printf("mm_read_unsymetric: Could not process Matrix Market banner "); + printf(" in file [%s]\n", fname); + return -1; + } + + + + if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode))) + { + fprintf(stderr, "Sorry, this application does not support "); + fprintf(stderr, "Market Market type: [%s]\n", + mm_typecode_to_str(matcode)); + return -1; + } + + /* find out size of sparse matrix: M, N, nz .... */ + + if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) + { + fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); + return -1; + } + + *M_ = M; + *N_ = N; + *nz_ = nz; + + /* reseve memory for matrices */ + + I = (int *) malloc(nz * sizeof(int)); + J = (int *) malloc(nz * sizeof(int)); + val = (double *) malloc(nz * sizeof(double)); + + *val_ = val; + *I_ = I; + *J_ = J; + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + + for (i=0; i +#include +#include +#include + +#include "sparseMatrix.h" +#include "mmio.h" +#include "parser.h" +#include "macros.h" +#include "utils.h" + +////COO PARSE +int MMCheck(MM_typecode mcode) { + if (!mm_is_matrix(mcode)){ //consistency checks among flags in @mcode + ERRPRINT("invalid matrix: not a matrix\n"); + return EXIT_FAILURE; + } + if (mm_is_dense(mcode) ){ //|| mm_is_array(mcode) ){ + ERRPRINT("invalid matrix: not a supported sparse matrix\tDENSE MAT\n"); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens){ + int scanndRet=0; + ulong nzTrgt=*NZ,nzIdx=0; //expanded num of nz (in case of sym matrix) + ulong diagEntries=0, row = 0, col = 0;//current entry's row,col from MM -> 1 based + double val = 0; + entry* entries = NULL; //COO parsed entries + ///init + if (mm_is_symmetric(mcode)){ + nzTrgt = 2* (*NZ); //upscale max num of nz in the matrix + VERBOSE printf("MMtoCOO:\tparsing a simmetric matrix\n"); + } + if (!(entries = malloc(nzTrgt * sizeof(*entries)))){ + ERRPRINT("MMtoCOO: entries malloc errd\n"); + return NULL; + } + ///parse MM fp lines into COOordinate entries + while (1) { // Reading the fp until EOF + if (mm_is_pattern(mcode)){ + scanndRet = fscanf(fp, "%lu %lu\n", &row, &col); + val = 1.0; + } else if (mm_is_real(mcode) || (mm_is_integer(mcode))){ + scanndRet = fscanf(fp, "%lu %lu %lf\n", &row, &col, &val); + } + + + if (scanndRet == EOF){ //TODO more strict check with type&ret? + if (ferror(fp)){ + perror("fscanf EOF"); + goto _err; + } else break; + } + CONSISTENCY_CHECKS{ + //TODO USELESS ? ? ? + if ((mm_is_pattern(mcode) && scanndRet != 2) || + (!mm_is_pattern(mcode) && scanndRet != 3)){ + ERRPRINT("invalid matrix: not consistent entry scannable\n"); + goto _err; + } + } + ////ADD THE CURRENT MATRIX ENTRY + rowLens[row-1]++; + entries[nzIdx++]=(entry) { .row=row-1, .col=col-1, .val=val }; + //also mirrored entry if sym.matrix with reflected idx inside matrix limits + if (mm_is_symmetric(mcode) && row != col ){ + //TODO COSTRAINED FORMAT ?&& row <= mat->N && col <= mat->M ){ + SWAP(row,col); + rowLens[row-1]++; + entries[nzIdx++]=(entry) { .row=row-1, .col=col-1, .val=val }; + } + else diagEntries++; //for CONSISTENCY_CHECKS only + } + + //CONSISTENCY_CHECKS + nzTrgt = *NZ; + if(mm_is_symmetric(mcode)) nzTrgt = 2*(*NZ) - diagEntries; + assert( nzIdx == nzTrgt ); + + //update NZ + *NZ = nzIdx; + return entries; + + _err: + free(entries); return NULL; +} + +void freeMatrixMarket(MatrixMarket* mm){ + if (!mm) return; + free(mm->entries); + free(mm->rowLens); + free(mm); +} +MatrixMarket* MMRead(char* matPath){ + FILE* fp = fopen(matPath, "r"); + if (!fp){ + perror("fopen"); + return NULL; + } + MatrixMarket* out = calloc(1,sizeof(*out)); + if (!out){ + ERRPRINT("MMRead out malloc errd\n"); + goto err; + } + //banner -> parse matrix specs + if (mm_read_banner(fp, &out->mcode) != 0) { + fprintf(stderr,"mm_read_banner err at:%s\n",matPath); + goto err; + } + //assert matrix is compatible with this app scope + if (MMCheck(out->mcode)) goto err; + + //parse sizes + //TODO OVERCOME uint limitation? + if(mm_read_mtx_crd_size(fp,(uint*) &out->M, (uint*) &out->N, (uint*) &out->NZ)){ + fprintf(stderr,"mm_read_mtx_crd_size err at %s:\n",matPath); + goto err; + } + if (!(out->rowLens = calloc(out->M,sizeof(*(out->rowLens))))){ + ERRPRINT("MMRead:\trowLens calloc errd\n"); + goto err; + } + if (!(out->entries = MMtoCOO(&out->NZ, fp, out->mcode,out->rowLens))){ + ERRPRINTS("MAT PARSE TO CSR ERR at:%s\n",matPath); + goto err; + } + goto _end; + + err: + freeMatrixMarket(out); + out = NULL; + _end: + fclose(fp); + return out; +} + + +////COO -> ANYTHING ELSE CONVERSION +int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){ + int out = EXIT_FAILURE; + ulong idx; + long* _rowsLastCol = NULL; //for each row -> last added entry's columnIdx + ulong* rowsNextIdx = NULL; //for each row -> next entry progressive idx + if (!(rowsNextIdx = calloc(mat->M,sizeof(*rowsNextIdx)))){ + ERRPRINT("MMtoCOO: rowsNextIdx calloc errd\n"); + goto _end; + } + CONSISTENCY_CHECKS{ //alloc and init aux arr for entries sort check + if (!(_rowsLastCol = malloc(mat->M*sizeof(*_rowsLastCol)))){ + ERRPRINT("MMtoCOO: _rowsLastCol malloc errd\n"); + goto _end; + } + memset(_rowsLastCol,-1,mat->M*sizeof(*_rowsLastCol)); + } + /*TODO OLD + * //get rowLens->IRP (partial), TODO moved MMtoCOO to avoid FULL rescan entries + * for (ulong i=0; iNZ; i++) mat->IRP[entries[i].row+1]++; + * memcpy(mat->RL,mat->IRP + 1,sizeof(*mat->IRP) * mat->M); //TODO in next ifdef + * for (ulong i=2; iM+1; i++) mat->IRP[i] += mat->IRP[i-1]; + * OLD2: rowLens memcpy ... no just moved the pointer + * #ifdef ROWLENS + * memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef + * #endif + */ + //IRP: trasform rows lens as increments to build row index "pointer" + //0th -> 0 mandatory; 1th = 0th row len, ...., M+1th = end of Mth row + memcpy(mat->IRP+1,rowLens,sizeof(*rowLens) * mat->M);//init IRP with rows lens + for (ulong i=2; iM+1; i++) mat->IRP[i] += mat->IRP[i-1]; + CONSISTENCY_CHECKS assert(mat->IRP[mat->M] == mat->NZ); + ///FILL + //TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS + //sorted for each row (even nn sequential in @entries) + //entries write in CSR format + entry* e; + for (ulong i=0; iNZ; i++) { + e = entries+i; + CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE SORTED + /*#pragma message("COO sorting check enabled")*/ + if (_rowsLastCol[e->row] >= (long) e->col){ + ERRPRINTS("not sorted entry:%ld,%ld,%lf",e->row,e->col,e->val); + goto _end; + } + _rowsLastCol[e->row] = e->col; + } + idx = mat -> IRP[e->row] + rowsNextIdx[e->row]++; + mat -> AS[idx] = e->val; + mat -> JA[idx] = e->col; + } + + out = EXIT_SUCCESS; + + _end: + if(rowsNextIdx) free(rowsNextIdx); + if(_rowsLastCol) free(_rowsLastCol); + + return out; +} + +int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){ + int out=EXIT_FAILURE; + ulong maxRow = 0, col, _ellEntriesTot, *rowsNextCol; + long* _rowsLastCol=NULL; + entry* e; + for (ulong i=0; iM; i++) maxRow = MAX(maxRow,rowLens[i]); + _ellEntriesTot = 2*mat->M*maxRow; + #ifdef LIMIT_ELL_SIZE + if ( _ellEntriesTot > ELL_MAX_ENTRIES ){ + ERRPRINTS("Required entries %lu -> %lu uMB for the matrix exceed the " + "designated threashold of: %lu -> %lu MB for ellpack\n", + _ellEntriesTot,(sizeof(double)*_ellEntriesTot) >> 20, + ELL_MAX_ENTRIES,(sizeof(double)*ELL_MAX_ENTRIES) >> 20); + return EXIT_FAILURE; + } + #endif + //malloc aux vects + if (!(rowsNextCol = calloc(mat->M,sizeof(*rowsNextCol)))){ + ERRPRINT("MMtoELL:\trowsNextCol calloc errd\n"); + goto _end; + } + CONSISTENCY_CHECKS{ //alloc and init aux arr for entries SORT CHECK + if (!(_rowsLastCol = malloc(mat->M*sizeof(*_rowsLastCol)))){ + ERRPRINT("MMtoELL:\trowsLastCol malloc errd\n"); + goto _end; + } + memset(_rowsLastCol,-1,mat->M*sizeof(*_rowsLastCol)); + } + ///malloc dependant to MAX ROW LEN, err free in the caller + if (!(mat->AS = calloc(mat->M * maxRow, sizeof(*(mat->AS))))){ + ERRPRINT("MMtoELL:\tELL->AS calloc errd\n"); + goto _end; + } //zero init for auto rows residual fill with 0 + if (!(mat->JA = calloc(mat->M * maxRow, sizeof(*(mat->JA))))){ + ERRPRINT("MMtoELL:\tELL->JA calloc errd\n"); + goto _end; + } + + mat->MAX_ROW_NZ = maxRow; + /*#ifdef ROWLENS + *memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef + *#endif + */ + ///FILL NZ + //TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS + //sorted for each row (even nn sequential in @entries) + for (ulong i=0; iNZ; i++){ + e = entries + i; + CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE COLS SORTED for righe successive + /*#pragma message("COO sorting check enabled")*/ + if (_rowsLastCol[e->row] >= (long) e->col){ + ERRPRINTS("not sorted entry:%ld,%ld,%lf", + e->row,e->col,e->val); + goto _end; + } + _rowsLastCol[e->row] = e->col; + } + col = rowsNextCol[e->row]++; //place entry in its row's sequent spot + mat->AS[ IDX2D(e->row,col,maxRow) ] = e->val; + mat->JA[ IDX2D(e->row,col,maxRow) ] = e->col; + + } + ///FILL PAD + ulong padded = 0,paddedEntries = mat->M*mat->MAX_ROW_NZ; + for (ulong r=0; rM; r++){ + for (ulong c=rowLens[r],j=IDX2D(r,c,maxRow); cAS[j] = ELL_AS_FILLER; //TODO ALREADY DONE IN CALLOC + //mat->JA[j] = mat->JA[rowLens[r]-1]; //ELL_JA_FILLER; //TODO calloc CUDA benefit? + } + } + VERBOSE{ + printf("padded %lu entries = %lf%% of NZ\n",padded,100*padded/(double) mat->NZ); + printf("ELL matrix of: %lu paddedEntries -> %lu MB of JA+AS\n", + paddedEntries,(paddedEntries*sizeof(*(mat->JA))+paddedEntries*sizeof(*(mat->AS))) >> 20); + } + out = EXIT_SUCCESS; + _end: + if(rowsNextCol) free(rowsNextCol); + if(_rowsLastCol) free(_rowsLastCol); + return out; +} +////wrapper MM -> specialized target +spmat* MMtoCSR(char* matPath){ + spmat* mat = NULL; + MatrixMarket* mm = MMRead(matPath); + if (!mm){ + ERRPRINT("MMtoCSR parse err\n"); + return NULL; + } + if (!(mat = calloc(1,sizeof(*mat)))){ + ERRPRINT("MMtoCSR: mat struct alloc errd"); + goto err; + } + mat -> M = mm->M; + mat -> N = mm->N; + mat -> NZ= mm->NZ; + //alloc sparse matrix components + if (!(mat->IRP = calloc(mat->M+1,sizeof(*(mat->IRP))))){ + ERRPRINT("MMtoCSR: IRP calloc err\n"); + goto err; + } + ////alloc core struct of CSR + if(!(mat->JA = malloc(mat->NZ*sizeof(*(mat->JA))))){ + ERRPRINT("MMtoCSR: JA malloc err\n"); + goto err; + } + if(!(mat->AS = malloc(mat->NZ*sizeof(*(mat->AS))))){ + ERRPRINT("MMtoCSR: AS malloc err\n"); + goto err; + } + if (COOtoCSR(mm->entries,mat,mm->rowLens)) goto err; + #ifdef ROWLENS + mat->RL = mm->rowLens; + mm->rowLens = NULL; //avoid free in @freeMatrixMarket + #endif + + VERBOSE + printf("MMtoCSR: %lu NZ entries-> %lu MB of AS+JA+IRP\n",mat->NZ, + (mat->NZ*sizeof(*(mat->AS))+mat->NZ*sizeof(*(mat->JA))+(1+mat->M*sizeof(*(mat->IRP))))>>20); + goto _free; + + + err: + if (mat) freeSpmat(mat); + mat = NULL; + _free: + freeMatrixMarket(mm); + return mat; +} + + +spmat* MMtoELL(char* matPath){ + spmat* mat = NULL; + MatrixMarket* mm = MMRead(matPath); + if (!mm){ + ERRPRINT("MMtoELL: parse err\n"); + return NULL; + } + if (!(mat = calloc(1,sizeof(*mat)))){ + ERRPRINT("MMtoELL: mat struct alloc errd"); + goto err; + } + ////alloc core struct of CSR + mat -> M = mm->M; + mat -> N = mm->N; + mat -> NZ= mm->NZ; + if (COOtoELL(mm->entries,mat,mm->rowLens)) goto err; + #ifdef ROWLENS + mat->RL = mm->rowLens; + mm->rowLens = NULL; //avoid free in @freeMatrixMarket + #endif + + goto _free; + + err: + if(mat) freeSpmat(mat); + mat = NULL; + _free: + freeMatrixMarket(mm); + return mat; +} diff --git a/cbind/base/psb_objhandle_mod.F90 b/cbind/base/psb_objhandle_mod.F90 index d6200294..69ce1d0b 100644 --- a/cbind/base/psb_objhandle_mod.F90 +++ b/cbind/base/psb_objhandle_mod.F90 @@ -40,35 +40,4 @@ module psb_objhandle_mod type, bind(c) :: psb_c_zspmat type(c_ptr) :: item = c_null_ptr - end type psb_c_zspmat - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! sp3mm c code structs -! TODO : rename to conventions -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - type, bind(C, name=spmat) :: spmat_t - ! number of non zeros and dimensions - integer(c_size_t) :: nz, m, n - ! value array - real(c_double), allocatable :: as(:) - ! columns array - integer(c_size_t), allocatable :: ja(:) - ! row index pointers array - integer(c_size_t), allocatable :: irp(:) - ! lengths of the rows - integer(c_size_t), allocatable :: rl(:) - ! max value of rl - integer(c_size_t) :: max_row_nz - end type spmat_t - - type, bind(C, name=CONFIG) :: config_t - ! dimensions of the grid - integer(c_short) :: grid_rows, grid_cols - ! how to compute symb mul (if required) - integer(c_int) :: symb_mm_row_impl_id - ! thread num to use in OMP parallel region - integer(c_int) :: thread_num - ! CHUNKS_DISTR_INTERF func pntr - type(c_ptr) :: chunk_distrb_func - end type config_t -end module psb_objhandle_mod + end type psb_c_zspmat \ No newline at end of file