added sp3mm4amg source code to the repository and created C wrapper routine to be used in the fortran interface

sp3mm-interface
wlthr 2 years ago
parent 1a4ae1e973
commit 0c88352530

@ -0,0 +1,7 @@
{
"fortran.fortls.path": "/usr/bin/fortls",
"files.associations": {
"sp3mm_csr_omp_multi.h": "c",
"sp3mm_csr_omp_ub_generic.h": "c"
}
}

@ -0,0 +1,15 @@
Copyright Andrea Di Iorio 2022
This file is part of Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN
Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN. If not, see <http://www.gnu.org/licenses/>.

@ -0,0 +1,66 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
//Get multiple implementation for C-Fortran indexing by re-define & re-include
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <alloca.h> //TODO quick hold few CSR cols partition sizes
#include <omp.h>
//UB version deps
#include "Sp3MM_CSR_OMP_Multi.h"
#include "SpMMUtilsMulti.h"
#include "sparseUtilsMulti.h"
#include "ompChunksDivide.h"
#include "parser.h"
#include "utils.h"
#include "macros.h"
#include "sparseMatrix.h"
#include "inlineExports.h"
//Symb version deps
#include "Sp3MM_CSR_OMP_Multi.h"
//global vars -> audit
double Start,End,Elapsed,ElapsedInternal;
#define OFF_F 0
#include "inlineExports_Generic.h"
#include "Sp3MM_CSR_OMP_UB_Generic.c"
#include "Sp3MM_CSR_OMP_Num_Generic.c"
#undef OFF_F
#define OFF_F 1
#include "inlineExports_Generic.h"
#include "Sp3MM_CSR_OMP_UB_Generic.c"
#include "Sp3MM_CSR_OMP_Num_Generic.c"
#undef OFF_F

@ -0,0 +1,249 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*#pragma message( "compiling SpMM_CSR_OMP_Generic.c with OFF_F as:" STR(OFF_F) )*/
#ifndef OFF_F
#error generic implementation requires OFF_F defined
#endif
#ifndef SP3MM_OMP_SYMB
#define SP3MM_OMP_SYMB
//allocCSRSpMatSymbStep aux function for IRP set by symb step output
static inline idx_t _setCSR_IRP_1DPartitioing(spmat* m, idx_t* rowSizes){
idx_t r,cumulSize;
for (r=0,cumulSize=0; r<m->M; cumulSize += rowSizes[r++])
m->IRP[r] = cumulSize;
DEBUGCHECKS assert(cumulSize == rowSizes[m->M]);
m->IRP[m->M] = cumulSize;
return cumulSize;
}
static inline idx_t _setCSR_IRP_2DPartitioing(spmat* m,idx_t* rowSizes,ushort gridCols){
idx_t cumulSize=0;
for (idx_t r=0,cumulSizeOld; r<m->M; r++){
m->IRP[r] = cumulSize;
for (ushort gc=0; gc<gridCols; gc++){
cumulSizeOld = cumulSize;
cumulSize += rowSizes[ IDX2D(r,gc,gridCols) ];
//inplace update 2D rowParts len matrix as IRP like start offsets
rowSizes[ IDX2D(r,gc,gridCols) ] = cumulSizeOld;
}
}
DEBUGCHECKS assert(cumulSize == rowSizes[m->M*gridCols]);
m->IRP[m->M] = cumulSize;
return cumulSize;
}
/*
* allocate CSR mat @m, setting up correctly IRP and other buffers allocations
* if @gridCols == 1: (1D partitioning of @m) -> @rowsSizes will be an array of row lenghts
* if @gridCols > 1: (2D partitioning of @m) -> @rowsSizes will be a matrix with
* elem i,j = len of j-th colPartition (out of @gridCols) of i-th row
*/
static inline int allocCSRSpMatSymbStep(spmat* m,idx_t* rowSizes,ushort gridCols){
//setup IRP and get cumul, whole size of @m
if (!gridCols) return EXIT_FAILURE;
idx_t cumulSize;
if (gridCols == 1) cumulSize = _setCSR_IRP_1DPartitioing(m,rowSizes);
/*if (gridCols > 1} -- static analizer miss unsignedness...*/
else cumulSize = _setCSR_IRP_2DPartitioing(m,rowSizes,gridCols);
m->IRP[m->M] = cumulSize;
m->NZ = cumulSize;
if (!(m->AS = malloc(rowSizes[m->M] * sizeof(*m->AS))) ){
ERRPRINT("allocCSRSpMatSymbStep m->AS malloc errd\n");
return EXIT_FAILURE;
}
if (!(m->JA = malloc(rowSizes[m->M] * sizeof(*m->JA))) ){
ERRPRINT("allocCSRSpMatSymbStep m->JA malloc errd\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
#endif
//////////////////// COMPUTE CORE Sp[3]MM SYMB-NUMB PHASE //////////////////////////
////////Sp3MM as 2 x SpMM
///1D
spmat* CAT(spmmRowByRow_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
ACC_DENSE *accVects = NULL,*acc;
SPMM_ACC* outAccumul=NULL;
idx_t* rowsSizes = NULL;
///init AB matrix with SPMM heuristic preallocation
spmat* AB = allocSpMatrix(A->M,B->N);
if (!AB) goto _err;
///aux structures alloc
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
ERRPRINT("accVects init failed\n");
goto _err;
}
///SYMBOLIC STEP
if (!(rowsSizes = CAT(SpMM_Symb___,OFF_F) (cfg->symbMMRowImplID, A,B)))
goto _err;
if (allocCSRSpMatSymbStep(AB,rowsSizes,1)) goto _err;
///NUMERIC STEP
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
#pragma omp parallel for schedule(runtime) private(acc)
for (idx_t r=0; r<A->M; r++){ //row-by-row formulation
acc = accVects + omp_get_thread_num();
_resetAccVect(acc); //rezero for the next A row
//for each A's row nnz, accumulate scalar vector product nnz.val * B[nnz.col]
/* direct use of sparse scalar vector multiplication
for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; ja<A->IRP[r+1]-OFF_F; ja++){
ca = A->JA[ja] - OFF_F;
jb = B->IRP[ca] - OFF_F;
bRowLen = B->IRP[ca+1] - B->IRP[ca];
CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc);
} */
for (ulong ja=A->IRP[r]-OFF_F; ja<A->IRP[r+1]-OFF_F; ja++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc);
//direct sparsify: trasform accumulated dense vector to a CSR row
sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
}
#if OFF_F != 0
C_FortranShiftIdxs(AB);
#endif
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
DEBUG checkOverallocPercent(rowsSizes,AB);
goto _free;
_err:
if(AB) freeSpmat(AB);
AB=NULL; //nothing'll be returned
_free:
free(rowsSizes);
if(accVects) freeAccsDense(accVects,cfg->threadNum);
if(outAccumul) freeSpMMAcc(outAccumul);
return AB;
}
spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
ACC_DENSE *accVects = NULL,*acc;
SPMM_ACC* outAccumul=NULL;
idx_t* rowsSizes = NULL;
///init AB matrix with SPMM heuristic preallocation
spmat* AB = allocSpMatrix(A->M,B->N);
if (!AB) goto _err;
///aux structures alloc
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
ERRPRINT("accVects init failed\n");
goto _err;
}
///SYMBOLIC STEP
if (!(rowsSizes = CAT(SpMM_Symb___,OFF_F) (cfg->symbMMRowImplID, A,B)))
goto _err;
if (allocCSRSpMatSymbStep(AB,rowsSizes,1))
goto _err;
///NUMERIC STEP
//perform Gustavson over rows blocks -> M / @cfg->gridRows
ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows;
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
ulong b,startRow,block; //omp for aux vars
#pragma omp parallel for schedule(runtime) private(acc,startRow,block)
for (b=0; b < cfg->gridRows; b++){
block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem);
startRow = UNIF_REMINDER_DISTRI_STARTIDX(b,rowBlock,rowBlockRem);
for (idx_t r=startRow; r<startRow+block; r++){
acc = accVects + omp_get_thread_num();
_resetAccVect(acc); //rezero for the next A row
//for each A's row nnz, accumulate scalar vector product nnz.val * B[nnz.col]
/* direct use of sparse scalar vector multiplication
for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; ja<A->IRP[r+1]-OFF_F; ja++){
ca = A->JA[ja] - OFF_F;
jb = B->IRP[ca] - OFF_F;
bRowLen = B->IRP[ca+1] - B->IRP[ca];
CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc);
} */
for (ulong ja=A->IRP[r]-OFF_F; ja<A->IRP[r+1]-OFF_F; ja++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc);
//direct sparsify: trasform accumulated dense vector to a CSR row
sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
}
}
#if OFF_F != 0
C_FortranShiftIdxs(AB);
#endif
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
DEBUG checkOverallocPercent(rowsSizes,AB);
goto _free;
_err:
if(AB) freeSpmat(AB);
AB=NULL; //nothing'll be returned
_free:
if(rowsSizes) free(rowsSizes);
if(accVects) freeAccsDense(accVects,cfg->threadNum);
if(outAccumul) freeSpMMAcc(outAccumul);
return AB;
}
spmat* CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N);
idx_t* bColOffsets = NULL; //B group columns starting offset for each row
ACC_DENSE *accVectors=NULL,*accV;
SPACC* accRowPart;
spmat* AB = allocSpMatrix(A->M,B->N);
SPMM_ACC* outAccumul=NULL;
idx_t *rowsPartsSizes=NULL, *rowSizes=NULL; //for rows' cols partition, correct len
if (!AB) goto _err;
//2D indexing aux vars
ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables
////get bColOffsets for B column groups
if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols)))
goto _err;
///TODO TODO
_err:
if (AB) freeSpmat(AB);
AB = NULL;
_free:
free(rowsPartsSizes);
free(bColOffsets);
if (accVectors) freeAccsDense(accVectors,gridSize);
if (outAccumul) freeSpMMAcc(outAccumul);
return AB;
}
/* TODO
///2D
//PARTITIONS NOT ALLOCATED
spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
///SP3MM
spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,SPMM_INTERF spmm){
////////Sp3MM direct
///1D
spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,SPMM_INTERF spmm){
*/

@ -0,0 +1,594 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* CSR Sp[3]MM Symbolic step implementations
* target: compute the output matrix size and the row lens for preallocation
* direct write out partial results
* See interfaces in respective header
*/
/*#pragma message( "compiling Sp3MM_CSR_OMP_Symb_Generic.c with config as:" \
STR(OFF_F) " - " STR(OUT_IDXS) " - " STR(COL_PARTS) )*/
#ifndef OFF_F
#error generic implementation requires OFF_F defined
#endif
///setup aux macros for different signatures implementation via #if arith expr
#pragma push_macro("OUT_IDXS")
#pragma push_macro("_OUT_IDXS")
#pragma push_macro("COL_PARTS")
#pragma push_macro("_COL_PARTS")
#ifdef OUT_IDXS
#define _OUT_IDXS TRUE
#else
#define _OUT_IDXS FALSE
#define OUT_IDXS _UNDEF
#endif
#ifdef COL_PARTS
#define _COL_PARTS TRUE
#else
#define _COL_PARTS FALSE
#define COL_PARTS _UNDEF
#endif
///
//////SpMM - rowByrow
///1row->matrix->outRow
//RBTREE based
/*
* Compute symbolic product of (nnz indexes of) row @aRowJA and matrix @b
* insert nnz indexes of the mul. result row as nodes in a rbtree rooted at @root
* with nodes in @nodes which have to be enough for the mul result row (use an UB)
* Retuns: multiplication result row NNZ number,se CONFIG_MACROS below for more
*
* CONFIG_MACROS:
* if _OUT_IDXS == TRUE return mul.result row nnz idxs in @outIdxs
* ifdef: OUT_IDXS_RBTREE_NODES: nnz indexes returned inplace sorting rbtree
* as nnz indexes(JA) of the mul result row
* else: stop at returning the mul. result row lenght
* if _COL_PARTS == TRUE return the number of nonzero elements in
* in each of the @gridCols column partitions inside @rowColPartsLens
* OFF_F: offset back indexes from fortran
* TODO also output indexes are shifted (see c_b )
*/
static inline idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F)
(
idx_t* aRowJA, idx_t aRowLen, spmat* b,rbRoot* root, rbNode* nodes
#if _OUT_IDXS == TRUE && !defined OUT_IDXS_RBTREE_NODES
,idx_t* outIdxs
#endif
#if _COL_PARTS == TRUE
,ushort gridCols,idx_t* rowColPartsLens
#endif
)
{
//Compute resulting ab's row non zero indexes and total lenght
idx_t abRowLen = 0; //mul.result row len, return value
for ( idx_t i=0,c_a,inserted; i < aRowLen; i++ ){ //for each entry in a's row
c_a = aRowJA[i]-OFF_F;
//gather diffrent nnz indexes in corresponding b's `c_a`th row
for ( idx_t j = b->IRP[c_a]-OFF_F,c_b; j < b->IRP[c_a+1]-OFF_F; j++ ){
c_b = b->JA[j]-OFF_F;
//check if c_b is nonzero index for mul.result row
inserted = rbInsertNewKey (root, nodes+abRowLen, c_b);
abRowLen += inserted; //inserted needed just after this
/* LESS EFFICIENT THEN BELOW (here no memory of last colPart)
#if _COL_PARTS == TRUE //keep track of which col partition is c_b in
if (inserted)
rowColPartsLens[ matchingUnifRangeIdx(c_b, b->N, gridCols) ]++;
#endif */
}
}
#if _OUT_IDXS == T && defined OUT_IDXS_RBTREE_NODES
/* return the non zero indexes of the mul.result row
* sorting inplace the nodes inserted in the rbtree */
sortRbNode(nodes,abRowLen);
#elif _OUT_IDXS == T || _COL_PARTS == T
uint i=0;
idx_t k;
#if _COL_PARTS == T
//colParts aux vars
idx_t _colBlock = abRowLen / gridCols, _colBlockRem = abRowLen % gridCols;
ushort gc=0;
idx_t gcStartCol = unifRemShareStart(gc,_colBlock,_colBlockRem);
idx_t gcEndCol = unifRemShareEnd(gc,_colBlock,_colBlockRem);
#endif //_COL_PARTS == T
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
k = rb_entry(n,rbNode,rb)->key;
#if _OUT_IDXS == T
//return the mul.result nnz index inside the rbNodes
outIdxs[ i++ ] = k;
#endif
#if _COL_PARTS == T
while (k >= gcEndCol ){ //see if the idx is in another col partition
// TODO also = since gcEndCol as k is 0based
gcEndCol = unifRemShareEnd(gc ,_colBlock, _colBlockRem);
gc++;
DEBUGCHECKS{ assert( gc < gridCols ); }
}
rowColPartsLens[gc]++;
#endif //_COL_PARTS == T
}
#endif //_OUT_IDXS == T ... _COL_PARTS == T
/*DEBUGCHECKS{ //TODO PRINT NNZ INDEXES FOR MANUAL (PAINFUL CHECK)
idx_t k;
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
k = rb_entry(n,rbNode,rb)->key;
printf("%lu, ",k);
}
printf("\n");
}*/
return abRowLen;
}
//SPVECT_IDX_DENSE_MAP based TODO double implementation for trick syntax folding here...
/*
* SPVECT_IDX_DENSE_MAP based, as SpMM_Row_Symb_Rbtree but with idxMap aux idx keeping
* CONFIG_MACROS (new)
* IDX_RMUL_SYMB_RBTREE && ( _OUT_IDXS == T || _COL_PARTS == T ):
* (tmp) symb mult out indexes will be kept via a rbtree
* otherwise directly in the out array appending them and then sorting them
* (potentially same n log n)
*/
static inline idx_t CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F)
(
idx_t* aRowJA, idx_t aRowLen, spmat* b, SPVECT_IDX_DENSE_MAP* idxsMapAcc
#if _OUT_IDXS == TRUE
,idx_t* outIdxs
#endif
#if ( _OUT_IDXS == TRUE && IDX_RMUL_SYMB_RBTREE == T ) || _COL_PARTS == T
,rbRoot* root, rbNode* nodes
#endif // _OUT_IDXS == TRUE
#if _COL_PARTS == TRUE
,ushort gridCols,idx_t* rowColPartsLens
#endif
)
{
//Compute resulting ab's row non zero indexes and total lenght
idx_t abRowLen = 0; //mul.result row len, return value
for ( idx_t i=0,c_a,inserted; i < aRowLen; i++ ){ //for each entry in a's row
c_a = aRowJA[i]-OFF_F;
//gather diffrent nnz indexes in corresponding b's `c_a`th row
for ( idx_t j = b->IRP[c_a]-OFF_F,c_b; j < b->IRP[c_a+1]-OFF_F; j++ ){
c_b = b->JA[j]-OFF_F;
//check if c_b is nonzero index for mul.result row
inserted = spVect_idx_in(c_b,idxsMapAcc);
#if _OUT_IDXS == T || _COL_PARTS == T //idxs HAS TO be accumulated
if (inserted)
#if IDX_RMUL_SYMB_RBTREE == T || _OUT_IDXS == F //add it in a RBTREE struct
rbInsertNewKey (root, nodes+idxsMapAcc->len, c_b);
#else //append it, then sort
outIdxs[idxsMapAcc->len] = c_b;
#endif //IDX_RMUL_SYMB_RBTREE == T //how accumulated key c_b
#endif //#if _OUT_IDXS == T || _COL_PARTS == T
}
}
abRowLen = idxsMapAcc->len;
//gather idxs or their sparsity struct in output row
#if _OUT_IDXS == T || _COL_PARTS == T
idx_t j = 0,k;
#if _COL_PARTS == T
//colParts aux vars
idx_t _colBlock = abRowLen / gridCols, _colBlockRem = abRowLen % gridCols;
ushort gc = 0;
idx_t gcStartCol = unifRemShareStart(gc,_colBlock,_colBlockRem);
idx_t gcEndCol = unifRemShareEnd(gc,_colBlock,_colBlockRem);
#endif
#if IDX_RMUL_SYMB_RBTREE == T || _OUT_IDXS == F ///idxs recorded in a aux rbtree
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
k = rb_entry(n,rbNode,rb)->key;
#if _OUT_IDXS == T
outIdxs[ j++ ] = k; //record ordered key sotred from aux rbtree
#endif
#else ///idxs recorded in aux append array
sort_idx_t(outIdxs,abRowLen);
for (; j < abRowLen; j++){
k = outIdxs[j]; //(OSS) already ordered in outIndexes arr
#endif //IDX_RMUL_SYMB_RBTREE == T
#if _COL_PARTS == T
while (k >= gcEndCol ){ //see if the idx is in another col partition
// TODO also = since gcEndCol as k is 0based
gcEndCol = unifRemShareEnd(gc ,_colBlock, _colBlockRem);
gc++;
DEBUGCHECKS{ assert( gc < gridCols ); }
}
rowColPartsLens[gc]++;
#endif //_COL_PARTS == T
}
#endif //_OUT_IDXS == T ... _COL_PARTS == T
return abRowLen;
}
//switch among 2 row_symb_XXX implemenetation aux
/*
* SpMM single row symbolic computation
* select one implementation via @implID
* among SpMM_Row_Symb_Rbtree or SpMM_Row_Symb_IdxMap
* args will be forwared accordingly
*/
static inline idx_t CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
(
ROW_MMSYM_IMPL_MODE implID, idx_t* aRowJA, idx_t aRowLen, spmat* b,
rbRoot* root, rbNode* nodes, SPVECT_IDX_DENSE_MAP* idxsMapAcc
#if _OUT_IDXS == TRUE
,idx_t* outIdxs
#endif
#if _COL_PARTS == TRUE
,ushort gridCols,idx_t* rowColPartsLens
#endif
)
{
if (implID == RBTREE) {
return CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F)
(
aRowJA,aRowLen,b,root,nodes
#if _OUT_IDXS == TRUE && !defined OUT_IDXS_RBTREE_NODES
,outIdxs
#endif
#if _COL_PARTS == TRUE
,gridCols,rowColPartsLens
#endif
);
}
else { //IDXMAP
return CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F)
(
aRowJA,aRowLen,b,idxsMapAcc
#if _OUT_IDXS == TRUE
,outIdxs
#endif
#if ( _OUT_IDXS == TRUE && IDX_RMUL_SYMB_RBTREE == T ) || _COL_PARTS == T
,root, nodes
#endif
#if _COL_PARTS == TRUE
,gridCols, rowColPartsLens
#endif
);
}
}
///SpMM row-by-row
idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
(
ROW_MMSYM_IMPL_MODE symbRowImplID, spmat* a, spmat* b
#if _OUT_IDXS == TRUE
,idx_t*** outIdxs
#endif
#if _COL_PARTS == TRUE
,ushort gridCols, idx_t** rowColPartsLens
#endif
)
{
///initial allocations
rbRoot* rbRoots = NULL;
rbNode* rbNodes = NULL;
SPVECT_IDX_DENSE_MAP* idxsMapAccs = NULL;
idx_t *rowLens=NULL,*upperBoundedRowsLens=NULL,*upperBoundedSymMat=NULL;
idx_t maxRowLen=0;
int rbTreeUsed = (symbRowImplID == RBTREE ||
(IDX_RMUL_SYMB_RBTREE && (_COL_PARTS || _OUT_IDXS)) );
if ( !(rowLens = malloc(sizeof(*rowLens) * (a->M+1))) ){
ERRPRINT("SpMM_Symb_ rowLens malloc errd\n");
goto _err;
}
if (_OUT_IDXS == TRUE || rbTreeUsed ){
if (!(upperBoundedRowsLens = CAT(spMMSizeUpperbound_,OFF_F)(a,b)))
goto _err;
}
#if _OUT_IDXS == TRUE
if (!(*outIdxs = malloc(sizeof(**outIdxs) * a->M))){
ERRPRINT("SpMM_Symb_ outIdxs malloc errd\n");
goto _err;
}
if (!(upperBoundedSymMat = malloc(
sizeof(*upperBoundedSymMat)*upperBoundedRowsLens[a->M]))){
ERRPRINT("SpMM_Symb_ upperBoundedSymMat malloc errd\n");
goto _err;
}
//write rows' start pointer from full matrix JA allocated
for (idx_t i=0,cumul=0; i<a->M; cumul += upperBoundedRowsLens[i++])
*outIdxs[i] = upperBoundedSymMat + cumul;
#endif //#if _OUT_IDXS == TRUE
#if _COL_PARTS == TRUE
if (!(*rowColPartsLens = malloc(a->M * gridCols * sizeof(**rowColPartsLens)))){
ERRPRINT("SpMM_Symb_ rowColPartsLens malloc errd\n");
goto _err;
}
#endif //_COL_PARTS
uint maxThreads = omp_get_max_threads(); //TODO FROM CFG
//index keeping aux struct
//rbtree implementation or idxMap with aux of symbTree for tmp outIdx keeping
if ( rbTreeUsed ){
maxRowLen = reductionMaxSeq(upperBoundedRowsLens, a->M);
//rbTrees for index keeping
rbRoots = malloc(maxThreads * sizeof(*rbRoots));
rbNodes = calloc(maxThreads * maxRowLen, sizeof(*rbNodes));
if( !rbRoots || !rbNodes ){
ERRPRINT("SpMM_Symb_ threads' aux rbTree mallocs errd\n");
goto _err;
}
//init roots
for (uint i=0; i<maxThreads; i++)
rbRoots[i] = RB_ROOT_CACHED;
}
if (symbRowImplID == IDXMAP){ //idxMap implementation && not outIdxs via rbtree
idxsMapAccs = calloc(maxThreads, sizeof(*idxsMapAccs));
if (!idxsMapAccs){
ERRPRINT("SpMM_Symb_ idxsMapAccs calloc errd\n");
goto _err;
}
//init idxs maps
for (uint i=0; i<maxThreads; i++){
if(initSpVectIdxDenseAcc(b->N,idxsMapAccs+i)) goto _err;
}
}
///rows parallel compute
idx_t* aRow;
idx_t aRowLen,rLen,abCumulLen=0;
int tid;
rbRoot* tRoot; rbNode* tNodes;
SPVECT_IDX_DENSE_MAP* tIdxsMapAcc = NULL;
#pragma omp parallel for schedule(static) \
private(aRow,aRowLen,rLen, tRoot,tNodes,tid) reduction(+:abCumulLen)
for(idx_t r=0; r<a->M; r++){
aRow = a->JA + a->IRP[r]-OFF_F;
aRowLen = a->IRP[r+1] - a->IRP[r];
tid = omp_get_thread_num();
//TODO low overhead pointer airth can be avoided with if (symbRowImplID .. && )
tIdxsMapAcc = idxsMapAccs + tid;
tRoot = rbRoots + tid;
tNodes = rbNodes + tid * maxRowLen;
rLen = CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
(
symbRowImplID, aRow, aRowLen, b, tRoot,tNodes, tIdxsMapAcc
#if _OUT_IDXS == TRUE
,*outIdxs[r]
#endif
#if _COL_PARTS == TRUE
,gridCols, (*rowColPartsLens) + IDX2D(r,0,gridCols)
#endif
);
rowLens[r] = rLen;
abCumulLen += rLen;
///reset symb idxs keeping aux structs
if (symbRowImplID==RBTREE || (IDX_RMUL_SYMB_RBTREE && (_COL_PARTS || _OUT_IDXS))){
*tRoot = RB_ROOT_CACHED;
memset(tNodes,0,rLen * sizeof(*tNodes));
}
if (symbRowImplID == IDXMAP) _resetIdxMap(tIdxsMapAcc);
}
rowLens[a->M] = abCumulLen;
goto _free;
_err:
free(rowLens);
#if _OUT_IDXS == T
if (outIdxs) free(*outIdxs);
#endif
#if _COL_PARTS == T
if (rowColPartsLens) free(*rowColPartsLens);
#endif
rowLens = NULL;
_free:
free(upperBoundedRowsLens);
free(upperBoundedSymMat);
free(rbRoots);
free(rbNodes);
if (idxsMapAccs){
for (uint i=0; i<maxThreads; i++) free(idxsMapAccs[i].idxsMap);
free(idxsMapAccs);
}
return rowLens;
}
///Sp3MM - rowByrowByrow ////////////////////////////////////////////////////////
//TODO COL_PARTS VERSION?
#if _OUT_IDXS == TRUE && _COL_PARTS == FALSE
idx_t CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F)
(
ROW_MMSYM_IMPL_MODE symbMMRowImplID,idx_t* aRowJA,idx_t aRowLen,spmat* b,spmat* c,
rbRoot* root,rbNode* nodes, SPVECT_IDX_DENSE_MAP* idxsMapAcc,idx_t* abRowJATmp
#if _OUT_IDXS == TRUE && !defined OUT_IDXS_RBTREE_NODES
,idx_t* outIdxs
#endif
)
{
struct rb_node* n;
idx_t abRowLen = CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
(symbMMRowImplID,aRowJA,aRowLen,b,root,nodes,idxsMapAcc,abRowJATmp);
cleanRbNodes(root, nodes, abRowLen);
idx_t abcRowLen = CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
(symbMMRowImplID,abRowJATmp,abRowLen,c,root,nodes,idxsMapAcc,abRowJATmp);
#if _OUT_IDXS == TRUE
#ifndef OUT_IDXS_RBTREE_NODES
//return the mul.result nnz index inside the rbNodes
uint i=0;
//rbNodeOrderedVisit(n,root) outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key;
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key;
}
#else
/* return the non zero indexes of the mul.result row
* sorting inplace the nodes inserted in the rbtree */
sortRbNode(nodes,abcRowLen);
#endif
#endif
return abcRowLen;
}
idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F)
(
ROW_MMSYM_IMPL_MODE symbMMRowImplID, spmat* a, spmat* b, spmat* c
#if _OUT_IDXS == TRUE
,idx_t*** outIdxs
#endif
)
{
//idxs keeping aux buffs
idx_t* abRowsJATmp = NULL;
rbRoot* rbRoots = NULL; rbNode* rbNodes = NULL;
SPVECT_IDX_DENSE_MAP* idxsMapAccs = NULL;
idx_t *abUpperBoundedRowsLens = NULL, *upperBoundedSymMat = NULL;
///initial allocations
idx_t* rowLens = malloc(sizeof(*rowLens) * (a->M +1) ); //to return
if (!rowLens){
ERRPRINT("SpMM_Symb_ rowLens malloc errd\n");
goto _err;
}
#if _OUT_IDXS == TRUE
if (!(*outIdxs = malloc(sizeof(**outIdxs) * a->M))){
ERRPRINT("SpMM_Symb_ outIdxs malloc errd\n");
goto _err;
}
if (!(abUpperBoundedRowsLens = CAT(spMMSizeUpperbound_,OFF_F)(a,b)))
goto _err;
/*TODO TODO instead of doing one sym product first to have a correct UB
* use an heuristics here to get output matrix size
*/
idx_t abcUBSize = abUpperBoundedRowsLens[a->M] * SP3MM_UB_HEURISTIC;
if (!(upperBoundedSymMat=malloc(sizeof(*upperBoundedSymMat)*abcUBSize))){
ERRPRINT("SpMM_Symb_ upperBoundedSymMat malloc errd\n");
goto _err;
}
//TODO heuristic TO UB rows bounds ... require compacting copy
for (idx_t i=0,cumul=0; i<a->M;
cumul += SP3MM_UB_HEURISTIC * abUpperBoundedRowsLens[i++])
*outIdxs[i] = upperBoundedSymMat + cumul;
#endif //#if _OUT_IDXS == TRUE
//rbTrees for index keeping
uint maxThreads = omp_get_max_threads(); //TODO FROM CFG
idx_t abMaxRowLen = reductionMaxSeq(abUpperBoundedRowsLens, a->M);
#ifdef HEURISTICS_UB
idx_t maxRowLenUB = abMaxRowLen * SP3MM_UB_HEURISTIC; //TODO UB HEURISTC
#else
idx_t maxRowLenUB = c->N;
#endif //HEURISTICS_UB
if (!(abRowsJATmp = malloc(maxThreads*maxRowLenUB*sizeof(*abRowsJATmp)) ) ){
ERRPRINT("Sp3MM_Symb_ abRowsJATmp malloc errd\n");
goto _err;
}
if (symbMMRowImplID == RBTREE || IDX_RMUL_SYMB_RBTREE ){
rbNodes = malloc(maxThreads * maxRowLenUB * sizeof(*rbNodes));
rbRoots = malloc(maxThreads * sizeof(*rbRoots));
if (!rbRoots || !rbNodes ){
ERRPRINT("Sp3MM_Symb_ rbRoots || rbNodes malloc errd\n");
goto _err;
}
//init roots
for (uint i=0; i<maxThreads; i++) rbRoots[i] = RB_ROOT_CACHED;
if (!( rbNodes = malloc(maxThreads * maxRowLenUB * sizeof(*rbNodes)) )){
ERRPRINT("Sp3MM_Symb_ rbNodes malloc errdn\n");
goto _err;
}
}
if (symbMMRowImplID == IDXMAP){
if (!( idxsMapAccs = calloc(maxThreads,sizeof(*idxsMapAccs)) )){
ERRPRINT("Sp3MM_Symb_ idxsMapAccs calloc errd\n");
goto _err;
}
//init idxs maps
for (uint i=0; i<maxThreads; i++){
if(initSpVectIdxDenseAcc(c->N,idxsMapAccs+i)) goto _err;
}
}
///rows parallel compute
idx_t* aRow;
idx_t aRowLen,rLen,outCumulLen=0;
//threads local pointers
int tid;
rbRoot* tRoot;
rbNode* tNodes;
SPVECT_IDX_DENSE_MAP* tIdxsMapAcc;
idx_t* tABRowJATmp;
#pragma omp parallel for schedule(static) \
private(aRow,aRowLen,rLen, tRoot,tNodes,tid) reduction(+:outCumulLen)
for(idx_t r=0; r<a->M; r++){
aRow = a->JA + a->IRP[r]-OFF_F;
aRowLen = a->IRP[r+1] - a->IRP[r];
tid = omp_get_thread_num();
tRoot = rbRoots + tid;
tNodes = rbNodes + tid * maxRowLenUB;
tIdxsMapAcc = NULL; //TODO
tABRowJATmp = abRowsJATmp + tid * maxRowLenUB;
rLen = CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F)
(symbMMRowImplID, aRow,aRowLen,b,c,tRoot,tNodes,tIdxsMapAcc,tABRowJATmp,
#if _OUT_IDXS == TRUE
*outIdxs[r]
#endif
);
outCumulLen += rLen;
rowLens[r] = rLen;
}
goto _free;
_err:
free(rowLens);
#if _OUT_IDXS == T
if (*outIdxs) free(*outIdxs);
#endif
rowLens = NULL;
_free:
free(abUpperBoundedRowsLens);
free(upperBoundedSymMat);
free(rbRoots);
free(rbNodes);
free(abRowsJATmp);
return rowLens;
}
#endif //#if !defined COL_PARTS && defined OUT_IDXS
///restore aux macros entry state
//#undef _OUT_ID
//#undef _COL_PARTS
#pragma pop_macro("OUT_IDXS")
#pragma pop_macro("_OUT_IDXS")
#pragma pop_macro("COL_PARTS")
#pragma pop_macro("_COL_PARTS")

@ -0,0 +1,92 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "macros.h"
#include "config.h"
#include "sparseMatrix.h"
#include "linuxK_rbtree_minimalized.h"
#include "Sp3MM_CSR_OMP_SymbStep_Multi.h"
#include "sparseUtilsMulti.h"
#include "utils.h"
#include <omp.h>
//inline exports
//ushort matchingUnifRangeIdx(idx_t idx, idx_t size, ushort rangesN);
idx_t* CAT(spMMSizeUpperbound_,0)(spmat* ,spmat* );
idx_t* CAT(spMMSizeUpperbound_,1)(spmat* ,spmat* );
idx_t* CAT(spMMSizeUpperboundColParts_,0)(spmat* ,spmat* ,ushort);
idx_t* CAT(spMMSizeUpperboundColParts_,1)(spmat* ,spmat* ,ushort);
/* Multi implementation of symbolic product of sparse matrixes, config macros
* OFF_F: C-Fortran spmat indexing
* OUT_IDXS: indexes output
* COL_PARTS: partitioning columns output...
*/
#define OUT_IDXS_ON OutIdxs_
#define COL_PARTS_ON ColParts_
#undef OUT_IDXS
#undef COL_PARTS
#define OFF_F 0
///generate basic versions
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
///generate outIdxs versions
#define OUT_IDXS OUT_IDXS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
#undef OUT_IDXS
///generate colParts versions
#define COL_PARTS COL_PARTS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
//generate outIdxs AND colParts versions
#define OUT_IDXS OUT_IDXS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
#undef OUT_IDXS
#undef COL_PARTS
#undef OFF_F
#define OFF_F 1
///generate basic versions
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
///generate outIdxs versions
#define OUT_IDXS OUT_IDXS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
#undef OUT_IDXS
///generate colParts versions
#define COL_PARTS COL_PARTS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
//generate outIdxs AND colParts versions
#define OUT_IDXS OUT_IDXS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
#undef OUT_IDXS
#undef COL_PARTS
#undef OFF_F

@ -0,0 +1,573 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*#pragma message( "compiling SpMM_CSR_OMP_Generic.c with OFF_F as:" STR(OFF_F) )*/
#ifndef OFF_F
#error generic implementation requires OFF_F defined
#endif
//////////////////// COMPUTE CORE Sp[3]MM Upperbound //////////////////////////
spmat* CAT(spmmSerial_,OFF_F)(spmat* A,spmat* B, CONFIG* _cfg){ //serial implementation
spmat* AB = NULL;
ACC_DENSE acc;
if ( allocAccDense(&acc,B->N) ) goto _free;
if (!(AB = allocSpMatrix(A->M,B->N))) goto _free;
for( idx_t r=0; r<A->M; r++ ){
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, &acc);
sparsifyDirect(&acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
}
_free:
freeAccsDense(&acc,1);
return AB;
}
////////Sp3MM as 2 x SpMM
///1D
spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N);
///thread aux
ACC_DENSE *accVects = NULL,*acc;
SPMM_ACC* outAccumul=NULL;
idx_t* rowsSizes = NULL;
///init AB matrix with SPMM heuristic preallocation
spmat* AB = allocSpMatrix(A->M,B->N);
if (!AB) goto _err;
if (!(rowsSizes = CAT(spMMSizeUpperbound_,OFF_F) (A,B))) goto _err;
///aux structures alloc
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
ERRPRINT("accVects init failed\n");
goto _err;
}
if (!(outAccumul = initSpMMAcc(rowsSizes[AB->M],AB->M))) goto _err;
#if SPARSIFY_PRE_PARTITIONING == T
//prepare sparse accumulators with U.Bounded rows[parts] starts
SPACC* accSp;
for( idx_t r=0,rSizeCumul=0; r<AB->M; rSizeCumul += rowsSizes[r++]){
accSp = outAccumul->accs+r;
accSp->JA = outAccumul->JA + rSizeCumul;
accSp->AS = outAccumul->AS + rSizeCumul;
//accSp->len = rowsSizes[r];
}
#endif
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
#pragma omp parallel for schedule(runtime) private(acc)
for (ulong r=0; r<A->M; r++){ //row-by-row formulation
//iterate over nz entry index c inside current row r
acc = accVects + omp_get_thread_num();
/* direct use of sparse scalar vector multiplication
for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; ja<A->IRP[r+1]-OFF_F; ja++){
ca = A->JA[ja] - OFF_F;
jb = B->IRP[ca] - OFF_F;
bRowLen = B->IRP[ca+1] - B->IRP[ca];
CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc);
}*/
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
//trasform accumulated dense vector to a CSR row
#if SPARSIFY_PRE_PARTITIONING == T
_sparsifyUB(acc,outAccumul->accs+r,0);
#else
sparsifyUBNoPartsBounds(outAccumul,acc,outAccumul->accs + r,0);
#endif
_resetAccVect(acc); //rezero for the next A row
}
///merge sparse row computed before
if (mergeRows(outAccumul->accs,AB)) goto _err;
#if OFF_F != 0
C_FortranShiftIdxs(AB);
#endif
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
DEBUG checkOverallocPercent(rowsSizes,AB);
goto _free;
_err:
if(AB) freeSpmat(AB);
AB=NULL; //nothing'll be returned
_free:
if(rowsSizes) free(rowsSizes);
if(accVects) freeAccsDense(accVects,cfg->threadNum);
if(outAccumul) freeSpMMAcc(outAccumul);
return AB;
}
spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N);
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
///thread aux
ACC_DENSE *accVects = NULL,*acc;
SPMM_ACC* outAccumul=NULL;
idx_t* rowsSizes = NULL;
///init AB matrix with SPMM heuristic preallocation
spmat* AB = allocSpMatrix(A->M,B->N);
if (!AB) goto _err;
if (!(rowsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B))) goto _err;
///aux structures alloc
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
ERRPRINT("accVects init failed\n");
goto _err;
}
if (!(outAccumul = initSpMMAcc(rowsSizes[AB->M],AB->M))) goto _err;
#if SPARSIFY_PRE_PARTITIONING == T
//prepare sparse accumulators with U.Bounded rows[parts] starts
SPACC* accSp;
for( idx_t r=0,rSizeCumul=0; r<AB->M; rSizeCumul += rowsSizes[r++]){
accSp = outAccumul->accs+r;
accSp->JA = outAccumul->JA + rSizeCumul;
accSp->AS = outAccumul->AS + rSizeCumul;
}
#endif
//perform Gustavson over rows blocks -> M / @cfg->gridRows
ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows;
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
ulong b,startRow,block; //omp for aux vars
#pragma omp parallel for schedule(runtime) private(acc,startRow,block)
for (b=0; b < cfg->gridRows; b++){
block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem);
startRow= UNIF_REMINDER_DISTRI_STARTIDX(b,rowBlock,rowBlockRem);
acc = accVects + omp_get_thread_num();
DEBUGPRINT{
fflush(NULL);
printf("block %lu\t%lu:%lu(%lu)\n",b,startRow,startRow+block-1,block);
fflush(NULL);
}
//row-by-row formulation in the given row block
for (ulong r=startRow; r<startRow+block; r++){
//iterate over nz entry index c inside current row r
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++)
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
//trasform accumulated dense vector to a CSR row
#if SPARSIFY_PRE_PARTITIONING == T
_sparsifyUB(acc,outAccumul->accs+r,0);
#else
sparsifyUBNoPartsBounds(outAccumul,acc,outAccumul->accs + r,0);
#endif
_resetAccVect(acc); //rezero for the next A row
}
}
///merge sparse row computed before
if (mergeRows(outAccumul->accs,AB)) goto _err;
#if OFF_F != 0
C_FortranShiftIdxs(AB);
#endif
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
DEBUG checkOverallocPercent(rowsSizes,AB);
goto _free;
_err:
if(AB) freeSpmat(AB);
AB=NULL; //nothing'll be returned
_free:
if(rowsSizes) free(rowsSizes);
if(accVects) freeAccsDense(accVects,cfg->threadNum);
if(outAccumul) freeSpMMAcc(outAccumul);
return AB;
}
///2D
//PARTITIONS NOT ALLOCATED
spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N);
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
idx_t* bColOffsets = NULL; //B group columns starting offset for each row
ACC_DENSE *accVectors=NULL,*accV;
SPACC* accRowPart;
spmat* AB = allocSpMatrix(A->M,B->N);
SPMM_ACC* outAccumul=NULL;
idx_t* rowsPartsSizes=NULL;
if (!AB) goto _err;
//2D indexing aux vars
ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables
////get bColOffsets for B column groups
if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols)))
goto _err;
#if SPARSIFY_PRE_PARTITIONING == T
uint rowsPartsSizesN = aSubRowsN;
if (!(rowsPartsSizes = CAT(spMMSizeUpperboundColParts_,OFF_F)
(A,B,cfg->gridCols,bColOffsets)))
#else
uint rowsPartsSizesN = AB->M;
if (!(rowsPartsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B)))
#endif
goto _err;
//aux vectors
///other AUX struct alloc
if (!(accVectors = _initAccVectors(gridSize,_colBlock+(_colBlockRem?1:0)))){
ERRPRINT("accVectors calloc failed\n");
goto _err;
}
if (!(outAccumul = initSpMMAcc(rowsPartsSizes[rowsPartsSizesN],aSubRowsN)))
goto _err;
#if SPARSIFY_PRE_PARTITIONING == T
//prepare sparse accumulators with U.Bounded rows[parts] starts
SPACC* accSp;
for( idx_t i=0,rSizeCumul=0; i<aSubRowsN; rSizeCumul += rowsPartsSizes[i++]){
accSp = outAccumul->accs+i;
accSp->JA = outAccumul->JA + rSizeCumul;
accSp->AS = outAccumul->AS + rSizeCumul;
}
//memset(outAccumul->AS,0,sizeof(double)*rowsSizes[AB->M]);memset(outAccumul->JA,0,sizeof(idx_t)*rowsSizes[AB->M]);
#endif
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
ulong tileID,t_i,t_j; //for aux vars
ulong bPartLen,bPartID,bPartOffset;//B partition acces aux vars
#pragma omp parallel for schedule(runtime) \
private(accV,accRowPart,rowBlock,colBlock,startRow,startCol,\
bPartLen,bPartID,bPartOffset,t_i,t_j)
for (tileID = 0; tileID < gridSize; tileID++){
///get iteration's indexing variables
//tile index in the 2D grid of AB computation TODO OMP HOW TO PARALLELIZE 2 FOR
t_i = tileID/cfg->gridCols; //i-th row block
t_j = tileID%cfg->gridCols; //j-th col block
//get tile row-cols group FAIR sizes
rowBlock = UNIF_REMINDER_DISTRI(t_i,_rowBlock,_rowBlockRem);
startRow = UNIF_REMINDER_DISTRI_STARTIDX(t_i,_rowBlock,_rowBlockRem);
startCol = UNIF_REMINDER_DISTRI_STARTIDX(t_j,_colBlock,_colBlockRem);
accV = accVectors + tileID;
DEBUGPRINT{
fflush(NULL);
colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem);
printf("rowBlock [%lu\t%lu:%lu(%lu)]\t",t_i,startRow,startRow+rowBlock-1,rowBlock);
printf("colBlock [%lu\t%lu:%lu(%lu)]\n",t_j,startCol,startCol+colBlock-1,colBlock);
fflush(NULL);
}
///AB[t_i][t_j] block compute
for (ulong r=startRow; r<startRow+rowBlock; r++){
//iterate over nz col index j inside current row r
//row-by-row restricted to colsubset of B to get AB[r][:colBlock:]
for (ulong j=A->IRP[r]-OFF_F,c; j<A->IRP[r+1]-OFF_F; j++){
//get start of B[A->JA[j]][:colBlock:]
c = A->JA[j]-OFF_F; // col of nnz in A[r][:] <-> target B row
bPartID = IDX2D(c,t_j,cfg->gridCols);
bPartOffset = bColOffsets[ bPartID ];
bPartLen = bColOffsets[ bPartID + 1 ] - bPartOffset;
CAT(scSparseVectMulPart_,OFF_F)(A->AS[j],B->AS+bPartOffset,
B->JA+bPartOffset,bPartLen,startCol,accV);
}
accRowPart = outAccumul->accs + IDX2D(r,t_j,cfg->gridCols);
#if SPARSIFY_PRE_PARTITIONING == T
_sparsifyUB(accV,accRowPart,startCol);
#else
sparsifyUBNoPartsBounds(outAccumul,accV,accRowPart,startCol);
#endif
_resetAccVect(accV);
}
}
if (mergeRowsPartitions(outAccumul->accs,AB,cfg)) goto _err;
#if OFF_F != 0
C_FortranShiftIdxs(AB);
#endif
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
DEBUG
CAT(checkOverallocRowPartsPercent_, OFF_F)(rowsPartsSizes, AB, cfg->gridCols, bColOffsets);
goto _free;
_err:
if (AB) freeSpmat(AB);
AB = NULL;
_free:
free(rowsPartsSizes);
free(bColOffsets);
if (accVectors) freeAccsDense(accVectors,gridSize);
if (outAccumul) freeSpMMAcc(outAccumul);
return AB;
}
spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A,\tcolBlocks (allcd) of B\tM=%luxN=%lu\n",A->M,B->N);
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
spmat *AB = NULL, *colPartsB = NULL, *colPart;
idx_t* rowsPartsSizes=NULL;
//aux vectors
SPMM_ACC* outAccumul=NULL;
ACC_DENSE *accVectors=NULL,*accV;
SPACC* accRowPart;
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables
//2D indexing aux vars
idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
idx_t* bColOffsets = NULL;
if (!(AB = allocSpMatrix(A->M,B->N))) goto _err;
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
////B cols partition in CSRs
//if (!(colPartsB = CAT(colsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) goto _err;
if (!(colPartsB = CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)
(B, cfg->gridCols, &bColOffsets)))
goto _err;
#if SPARSIFY_PRE_PARTITIONING == T
uint rowsPartsSizesN = aSubRowsN;
if (!(rowsPartsSizes = CAT(spMMSizeUpperboundColParts_,OFF_F)
(A, B, cfg->gridCols, bColOffsets)))
#else
uint rowsPartsSizesN = AB->M;
if (!(rowsPartsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B)))
#endif
goto _err;
///other AUX struct alloc
if (!(accVectors = _initAccVectors(gridSize,_colBlock+(_colBlockRem?1:0)))){
ERRPRINT("accVectors calloc failed\n");
goto _err;
}
if (!(outAccumul = initSpMMAcc(rowsPartsSizes[rowsPartsSizesN],aSubRowsN)))
goto _err;
#if SPARSIFY_PRE_PARTITIONING == T
//prepare sparse accumulators with U.Bounded rows[parts] starts
SPACC* accSp;
for( idx_t i=0,rLenCumul=0; i<aSubRowsN; rLenCumul += rowsPartsSizes[i++]){
accSp = outAccumul->accs+i;
accSp->JA = outAccumul->JA + rLenCumul;
accSp->AS = outAccumul->AS + rLenCumul;
}
#endif
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
ulong tileID,t_i,t_j; //for aux vars
#pragma omp parallel for schedule(runtime) \
private(accV,accRowPart,colPart,rowBlock,colBlock,startRow,startCol,t_i,t_j)
for (tileID = 0; tileID < gridSize; tileID++){
///get iteration's indexing variables
//tile index in the 2D grid of AB computation TODO OMP HOW TO PARALLELIZE 2 FOR
t_i = tileID/cfg->gridCols; //i-th row block
t_j = tileID%cfg->gridCols; //j-th col block
//get tile row-cols group FAIR sizes
rowBlock = UNIF_REMINDER_DISTRI(t_i,_rowBlock,_rowBlockRem);
colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem);
startRow = UNIF_REMINDER_DISTRI_STARTIDX(t_i,_rowBlock,_rowBlockRem);
startCol = UNIF_REMINDER_DISTRI_STARTIDX(t_j,_colBlock,_colBlockRem);
colPart = colPartsB + t_j;
accV = accVectors + tileID;
DEBUGPRINT{
fflush(NULL);
printf("rowBlock [%lu\t%lu:%lu(%lu)]\t",
t_i,startRow,startRow+rowBlock-1,rowBlock);
printf("colBlock [%lu\t%lu:%lu(%lu)]\n",
t_j,startCol,startCol+colBlock-1,colBlock);
fflush(NULL);
}
///AB[t_i][t_j] block compute
for (ulong r=startRow; r<startRow+rowBlock; r++){
//iterate over nz col index j inside current row r
//row-by-row restricted to colsubset of B to get AB[r][:colBlock:]
for (ulong j=A->IRP[r]-OFF_F,c,bRowStart,bRowLen; j<A->IRP[r+1]-OFF_F; j++){
//get start of B[A->JA[j]][:colBlock:]
c = A->JA[j]-OFF_F; // column of nnz entry in A[r][:] <-> target B row
bRowStart = colPart->IRP[c];
#ifdef ROWLENS
bRowLen = colPart->RL[c];
#else
bRowLen = colPart->IRP[c+1] - bRowStart;
#endif
CAT(scSparseVectMulPart_,OFF_F)(A->AS[j],
colPart->AS+bRowStart,colPart->JA+bRowStart,
bRowLen,startCol,accV);
}
accRowPart = outAccumul->accs + IDX2D(r,t_j,cfg->gridCols);
#if SPARSIFY_PRE_PARTITIONING == T
_sparsifyUB(accV,accRowPart,startCol);
#else
sparsifyUBNoPartsBounds(outAccumul,accV,accRowPart,startCol);
#endif
_resetAccVect(accV);
}
}
if (mergeRowsPartitions(outAccumul->accs,AB,cfg)) goto _err;
#if OFF_F != 0
C_FortranShiftIdxs(AB);
#endif
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
DEBUG
CAT(checkOverallocRowPartsPercent_, OFF_F)(rowsPartsSizes,AB,cfg->gridCols,bColOffsets);
goto _free;
_err:
ERRPRINT("spmmRowByRow2DBlocksAllocated failed\n");
if (AB) freeSpmat(AB);
AB = NULL;
_free:
if (colPartsB){
for (ulong i=0; i<cfg->gridCols; i++)
freeSpmatInternal(colPartsB+i);
free(colPartsB);
}
free(rowsPartsSizes);
free(bColOffsets);
if (accVectors) freeAccsDense(accVectors,gridSize);
if (outAccumul) freeSpMMAcc(outAccumul);
return AB;
}
///SP3MM
spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P,
CONFIG* cfg,SPMM_INTERF spmm){
double end,start,elapsed,partial,flops;
spmat *RAC = NULL, *out = NULL;
if (!spmm){
//TODO runtime on sizes decide witch spmm implementation to use if not given
spmm = &CAT(spmmRowByRow2DBlocks_,OFF_F);
}
/* TODO
alloc dense aux vector, reusable over 3 product
TODO arrays sovrallocati per poter essere riusati nelle 2 SpMM
ulong auxVectSize = MAX(R->N,AC->N);
auxVectSize = MAX(auxVectSize,P->N);
*/
start = omp_get_wtime();
/// triple product as a pair of spmm
if (!(RAC = spmm(R,AC,cfg))) goto _free;
AUDIT_INTERNAL_TIMES partial = End - Start;
if (!(out = spmm(RAC,P,cfg))) goto _free;
//
end = omp_get_wtime();
ElapsedInternal = End - Start + partial;
VERBOSE {
elapsed = end - start;
flops = (2 * R->NZ * P->NZ * AC->NZ) / (elapsed);
printf("elapsed %le - flops %le",elapsed,flops);
AUDIT_INTERNAL_TIMES
printf("\tinternalTime: %le",ElapsedInternal);
printf("\n");
}
_free:
zeroSpmat(RAC);
freeSpmat(RAC);
return out;
}
////////Sp3MM direct
///1D
spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,
SPMM_INTERF spmm){
ulong* rowSizes = NULL;
SPMM_ACC* outAccumul=NULL;
ACC_DENSE *accVectorsR_AC=NULL,*accVectorsRAC_P=NULL,*accRAC,*accRACP;
///init AB matrix with SPMM heuristic preallocation
spmat* out = allocSpMatrix(R->M,P->N);
if (!out) goto _err;
/*TODO 3MM VERSION COMPUTE OUT ALLOC :
-> \forall RAC.row -> hashmap{col=True}->(AVL||RBTHREE); upperBound std col RAC.rows.cols in hashmap || SYM_bis
* NB: UP per RACP => NN note dimensioni righe precise => stesso approccio riservazione spazio di spmm ( fetch_and_add )
* SYM_BIS ==> note dimensioni righe =>
* 1) pre riservazione spazio per righe -> cache allignement per threads
-(sc. static & blocco di righe allineato a cache block successivo a blocco righe precedente)
-(sc. dynamic& righe tutte allineate a cache block (NO OVERLAPS!) -> huge overhead ?
* 2) pre riservazione spazio righe diretamente in out CSR
-> probabili cache blocks overlap; salvo costo di P.M memcpy
*/
if (!(rowSizes = CAT(spMMSizeUpperbound_,OFF_F)(R,AC))) goto _err; ///TODO TOO LOOSE UB...INTEGRATE RBTREE FOR SYM->PRECISE
///aux structures alloc
if (!(outAccumul = initSpMMAcc(rowSizes[R->M],P->M))) goto _err; //TODO size estimated with RAC mat
if (!(accVectorsR_AC = _initAccVectors(cfg->threadNum,AC->N))){ //TODO LESS || REUSE
ERRPRINT("accVectorsR_AC init failed\n");
goto _err;
}
if (!(accVectorsRAC_P = _initAccVectors(cfg->threadNum,R->N))){ //TODO LESS || REUSE
ERRPRINT("accVectorsRAC_P init failed\n");
goto _err;
}
ulong c;
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (R->M,R,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
#pragma omp parallel for schedule(runtime) private(accRAC,accRACP,c)
for (ulong r=0; r<R->M; r++){ //row-by-row formulation
//iterate over nz entry index c inside current row r
accRAC = accVectorsR_AC + omp_get_thread_num();
accRACP = accVectorsRAC_P + omp_get_thread_num();
//computing (tmp) R*AC r-th row
for (idx_t j=R->IRP[r]-OFF_F; j<R->IRP[r+1]-OFF_F; j++)
CAT(scSparseRowMul_,OFF_F)(R->AS[j], AC, R->JA[j]-OFF_F, accRAC);
//forward the computed row
for (idx_t j=0; j<accRAC->nnzIdxMap.len; j++){
c = accRAC->nnzIdx[j];
CAT(scSparseRowMul_,OFF_F)(accRAC->v[c],P,c,accRACP);
}
//trasform accumulated dense vector to a CSR row TODO in UB buff
sparsifyUBNoPartsBounds(outAccumul,accRACP,outAccumul->accs+r,0);
_resetAccVect(accRAC);
_resetAccVect(accRACP);
}
///merge sparse row computed before
if (mergeRows(outAccumul->accs,out)) goto _err;
#if OFF_F != 0
C_FortranShiftIdxs(out);
#endif
AUDIT_INTERNAL_TIMES{
End=omp_get_wtime();
ElapsedInternal = End-Start;
}
DEBUG checkOverallocPercent(rowSizes,out);
goto _free;
_err:
if(out) freeSpmat(out);
out = NULL;
_free:
if(rowSizes) free(rowSizes);
if(accVectorsR_AC) freeAccsDense(accVectorsR_AC,cfg->threadNum);
if(accVectorsRAC_P) freeAccsDense(accVectorsRAC_P,cfg->threadNum);
if(outAccumul) freeSpMMAcc(outAccumul);
return out;
}

@ -0,0 +1,86 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include <config.h>
char* SCHEDULES[]={"OMP_SCHED_STATIC","OMP_SCHED_DYNAMIC","OMP_SCHED_GUIDED","OMP_SCHED_AUTO"};
void ompGetRuntimeSchedule(int* kind_chunk_monotonic){
/*
* export OMP_SCHEDULE="[modifier:]kind[, chunk]"
* typedef enum omp_sched_t {
* //schedule kinds
* omp_sched_static = 0x1,
* omp_sched_dynamic = 0x2,
* omp_sched_guided = 0x3,
* omp_sched_auto = 0x4,
* //schedule modifier //TODO API>=5.0
* omp_sched_monotonic = 0x80000000u //TODO OMP >=5
* } omp_sched_t;
*/
omp_sched_t k,kind; int chunk_size,monotonic=0;
omp_get_schedule(&kind,&chunk_size);
k=kind; //[monotonic OFF]
#if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0
monotonic = omp_sched_monotonic & kind;
if(monotonic) k = kind - omp_sched_monotonic;
#endif
printf("omp sched gather:\tkind:%s\tomp chunkSize:%d\tmonotonic:%s\tfairChunkFolding:%d\n",
SCHEDULES[k-1],chunk_size,monotonic?"Y":"N",FAIR_CHUNKS_FOLDING);
if (kind_chunk_monotonic){
kind_chunk_monotonic[0] = k;
kind_chunk_monotonic[1] = chunk_size;
kind_chunk_monotonic[2] = monotonic;
}
}
float ompVersionMacroMap(){
switch ( _OPENMP ){
case 200505: return 2.5;
case 200805: return 3.0;
case 201107: return 3.1;
case 201307: return 4.0;
case 201511: return 4.5;
case 201811: return 5.0;
case 202011: return 5.1;
}
}
//WRAPPER to print all ICV vars
#ifdef OMP_GET_ICV_MAIN
int main(){
#else
void ompGetAllICV(){
#endif
printf("export OMP_DISPLAY_ENV=VERBOSE for full ICV details\n");
printf("omp MAX THREADS USABLE\t%d\n",omp_get_max_threads());
ompGetRuntimeSchedule(NULL);
printf("omp API version:\t %1.1f\n",ompVersionMacroMap());
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,193 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2021 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved.
* Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011-2013 INRIA. All rights reserved.
* Copyright (c) 2015 University of Houston. All rights reserved.
* Copyright (c) 2015-2021 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017-2019 IBM Corporation. All rights reserved.
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
* Copyright (c) 2021-2022 Google, LLC. All rights reserved.
* Copyright (c) 2021-2022 Amazon.com, Inc. or its affiliates. All Rights
* reserved.
* Copyright (c) 2021 Bull S.A.S. All rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* Copyright (c) 2018-2021 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* Error classes and codes
* Do not change the values of these without also modifying mpif.h.in.
*/
#define MPI_SUCCESS 0
#define MPI_ERR_BUFFER 1
#define MPI_ERR_COUNT 2
#define MPI_ERR_TYPE 3
#define MPI_ERR_TAG 4
#define MPI_ERR_COMM 5
#define MPI_ERR_RANK 6
#define MPI_ERR_REQUEST 7
#define MPI_ERR_ROOT 8
#define MPI_ERR_GROUP 9
#define MPI_ERR_OP 10
#define MPI_ERR_TOPOLOGY 11
#define MPI_ERR_DIMS 12
#define MPI_ERR_ARG 13
#define MPI_ERR_UNKNOWN 14
#define MPI_ERR_TRUNCATE 15
#define MPI_ERR_OTHER 16
#define MPI_ERR_INTERN 17
#define MPI_ERR_IN_STATUS 18
#define MPI_ERR_PENDING 19
#define MPI_ERR_ACCESS 20
#define MPI_ERR_AMODE 21
#define MPI_ERR_ASSERT 22
#define MPI_ERR_BAD_FILE 23
#define MPI_ERR_BASE 24
#define MPI_ERR_CONVERSION 25
#define MPI_ERR_DISP 26
#define MPI_ERR_DUP_DATAREP 27
#define MPI_ERR_FILE_EXISTS 28
#define MPI_ERR_FILE_IN_USE 29
#define MPI_ERR_FILE 30
#define MPI_ERR_INFO_KEY 31
#define MPI_ERR_INFO_NOKEY 32
#define MPI_ERR_INFO_VALUE 33
#define MPI_ERR_INFO 34
#define MPI_ERR_IO 35
#define MPI_ERR_KEYVAL 36
#define MPI_ERR_LOCKTYPE 37
#define MPI_ERR_NAME 38
#define MPI_ERR_NO_MEM 39
#define MPI_ERR_NOT_SAME 40
#define MPI_ERR_NO_SPACE 41
#define MPI_ERR_NO_SUCH_FILE 42
#define MPI_ERR_PORT 43
#define MPI_ERR_QUOTA 44
#define MPI_ERR_READ_ONLY 45
#define MPI_ERR_RMA_CONFLICT 46
#define MPI_ERR_RMA_SYNC 47
#define MPI_ERR_SERVICE 48
#define MPI_ERR_SIZE 49
#define MPI_ERR_SPAWN 50
#define MPI_ERR_UNSUPPORTED_DATAREP 51
#define MPI_ERR_UNSUPPORTED_OPERATION 52
#define MPI_ERR_WIN 53
#define MPI_T_ERR_MEMORY 54
#define MPI_T_ERR_NOT_INITIALIZED 55
#define MPI_T_ERR_CANNOT_INIT 56
#define MPI_T_ERR_INVALID_INDEX 57
#define MPI_T_ERR_INVALID_ITEM 58
#define MPI_T_ERR_INVALID_HANDLE 59
#define MPI_T_ERR_OUT_OF_HANDLES 60
#define MPI_T_ERR_OUT_OF_SESSIONS 61
#define MPI_T_ERR_INVALID_SESSION 62
#define MPI_T_ERR_CVAR_SET_NOT_NOW 63
#define MPI_T_ERR_CVAR_SET_NEVER 64
#define MPI_T_ERR_PVAR_NO_STARTSTOP 65
#define MPI_T_ERR_PVAR_NO_WRITE 66
#define MPI_T_ERR_PVAR_NO_ATOMIC 67
#define MPI_ERR_RMA_RANGE 68
#define MPI_ERR_RMA_ATTACH 69
#define MPI_ERR_RMA_FLAVOR 70
#define MPI_ERR_RMA_SHARED 71
#define MPI_T_ERR_INVALID 72
#define MPI_T_ERR_INVALID_NAME 73
#define MPI_ERR_PROC_ABORTED 74
/* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */
#define MPI_ERR_PROC_FAILED 75
#define MPI_ERR_PROC_FAILED_PENDING 76
#define MPI_ERR_REVOKED 77
/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined
MPI_ERR_<foo> code. Set the last code to allow some room for adding
error codes without breaking ABI. */
#define MPI_ERR_LASTCODE 92
/*
* Comparison results. Don't change the order of these, the group
* comparison functions rely on it.
* Do not change the order of these without also modifying mpif.h.in.
*/
enum {
MPI_IDENT,
MPI_CONGRUENT,
MPI_SIMILAR,
MPI_UNEQUAL
};
/*
* MPI_Init_thread constants
* Do not change the order of these without also modifying mpif.h.in.
*/
enum {
MPI_THREAD_SINGLE,
MPI_THREAD_FUNNELED,
MPI_THREAD_SERIALIZED,
MPI_THREAD_MULTIPLE
};
/*
* Datatype combiners.
* Do not change the order of these without also modifying mpif.h.in.
* (see also mpif-common.h.fin).
*/
enum {
MPI_COMBINER_NAMED,
MPI_COMBINER_DUP,
MPI_COMBINER_CONTIGUOUS,
MPI_COMBINER_VECTOR,
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
MPI_COMBINER_HVECTOR_INTEGER,
#else
OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER, /* preserve ABI compatibility */
#endif
MPI_COMBINER_HVECTOR,
MPI_COMBINER_INDEXED,
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
MPI_COMBINER_HINDEXED_INTEGER,
#else
OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER, /* preserve ABI compatibility */
#endif
MPI_COMBINER_HINDEXED,
MPI_COMBINER_INDEXED_BLOCK,
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
MPI_COMBINER_STRUCT_INTEGER,
#else
OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER, /* preserve ABI compatibility */
#endif
MPI_COMBINER_STRUCT,
MPI_COMBINER_SUBARRAY,
MPI_COMBINER_DARRAY,
MPI_COMBINER_F90_REAL,
MPI_COMBINER_F90_COMPLEX,
MPI_COMBINER_F90_INTEGER,
MPI_COMBINER_RESIZED,
MPI_COMBINER_HINDEXED_BLOCK
};
////MOCK ERRHANDELERS
#define OMPI_ERRHANDLER_INVOKE(comm,err,fname) err
#define OMPI_ERRHANDLER_NOHANDLE_INVOKE(err,fname) err

@ -0,0 +1,288 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* extraction from openMPI, mocking not needed dependencies for MPI_Dims_create function export:
* Copyright (c) 2022 Andrea Di Iorio
*/
//#include "ompi_config.h"
#include "ompi_config_minimal.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>
//#include "ompi/mpi/c/bindings.h"
//#include "ompi/runtime/params.h"
//#include "ompi/communicator/communicator.h"
//#include "ompi/errhandler/errhandler.h"
static const char FUNC_NAME[] = "MPI_Dims_create";
/* static functions */
static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
static int getfactors(int num, int *nfators, int **factors);
/*
* This is a utility function, no need to have anything in the lower
* layer for this at all
* ****
* original manpage ... https://www.open-mpi.org/doc/v3.1/man3/MPI_Dims_create.3.php
* For Cartesian topologies, the function MPI_Dims_create helps the user select a balanced distribution of processes per coordinate direction,
* depending on the number of processes in the group to be balanced and optional constraints that can be specified by the user.
* One use is to partition all the processes (the size of MPI_COMM_WORLDs group) into an n-dimensional topology.
* The entries in the array dims are set to describe a Cartesian grid with ndims dimensions and a total of nnodes nodes.
* The dimensions are set to be as close to each other as possible, using an appropriate divisibility algorithm.
*
* The caller may further constrain the operation of this routine by specifying elements of array dims.
* If dims[i] is set to a positive number, the routine will not modify the number of nodes in dimension i; only those entries where dims[i] = 0 are modified by the call.
* Negative input values of dims[i] are erroneous. An error will occur if nnodes is not a multiple of ((pi) over (i, dims[i] != 0)) dims[i].
* For dims[i] set by the call, dims[i] will be ordered in nonincreasing order. Array dims is suitable for use as input to routine MPI_Cart_create. MPI_Dims_create is local.
* Example:
*
* dims
* before dims
* call function call on return
* -----------------------------------------------------
* (0,0) MPI_Dims_create(6, 2, dims) (3,2)
* (0,0) MPI_Dims_create(7, 2, dims) (7,1)
* (0,3,0) MPI_Dims_create(6, 3, dims) (2,3,1)
* (0,3,0) MPI_Dims_create(7, 3, dims) erroneous call
* ------------------------------------------------------
*/
int MPI_Dims_create(int nnodes, int ndims, int dims[])
{
int i;
int freeprocs;
int freedims;
int nfactors;
int *factors;
int *procs;
int *p;
int err;
/*if (MPI_PARAM_CHECK) {
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
if (0 > ndims) {
return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD,
MPI_ERR_DIMS, FUNC_NAME);
}
if ((0 != ndims) && (NULL == dims)) {
return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD,
MPI_ERR_ARG, FUNC_NAME);
}
if (1 > nnodes) {
return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD,
MPI_ERR_DIMS, FUNC_NAME);
}
}*/
assert( ndims > 0 && nnodes > 1 && NULL != dims );
/* Get # of free-to-be-assigned processes and # of free dimensions */
freeprocs = nnodes;
freedims = 0;
for (i = 0, p = dims; i < ndims; ++i,++p) {
if (*p == 0) {
++freedims;
} else if ((*p < 0) || ((nnodes % *p) != 0)) {
return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_DIMS,
FUNC_NAME);
} else {
freeprocs /= *p;
}
}
if (freedims == 0) {
if (freeprocs == 1) {
return MPI_SUCCESS;
}
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_DIMS,
FUNC_NAME);
}
if (freeprocs == 1) {
for (i = 0; i < ndims; ++i, ++dims) {
if (*dims == 0) {
*dims = 1;
}
}
return MPI_SUCCESS;
}
/* Factor the number of free processes */
if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(err,
FUNC_NAME);
}
/* Assign free processes to free dimensions */
if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
free(factors);
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(err,
FUNC_NAME);
}
/* Return assignment results */
p = procs;
for (i = 0; i < ndims; ++i, ++dims) {
if (*dims == 0) {
*dims = *p++;
}
}
free((char *) factors);
free((char *) procs);
/* all done */
return MPI_SUCCESS;
}
/*
* assignnodes
*
* Function: - assign processes to dimensions
* - get "best-balanced" grid
* - greedy bin-packing algorithm used
* - sort dimensions in decreasing order
* - dimensions array dynamically allocated
* Accepts: - # of dimensions
* - # of prime factors
* - array of prime factors
* - ptr to array of dimensions (returned value)
* Returns: - 0 or ERROR
*/
static int
assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
{
int *bins;
int i, j;
int n;
int f;
int *p;
int *pmin;
if (0 >= ndim) {
return MPI_ERR_DIMS;
}
/* Allocate and initialize the bins */
bins = (int *) malloc((unsigned) ndim * sizeof(int));
if (NULL == bins) {
return MPI_ERR_NO_MEM;
}
*pdims = bins;
for (i = 0, p = bins; i < ndim; ++i, ++p) {
*p = 1;
}
/* Loop assigning factors from the highest to the lowest */
for (j = nfactor - 1; j >= 0; --j) {
f = pfacts[j];
/* Assign a factor to the smallest bin */
pmin = bins;
for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
if (*p < *pmin) {
pmin = p;
}
}
*pmin *= f;
}
/* Sort dimensions in decreasing order (O(n^2) for now) */
for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
if (*p > *pmin) {
n = *p;
*p = *pmin;
*pmin = n;
}
}
}
return MPI_SUCCESS;
}
/*
* getfactors
*
* Function: - factorize a number
* Accepts: - number
* - # prime factors
* - array of prime factors
* Returns: - MPI_SUCCESS or ERROR
*/
static int
getfactors(int num, int *nfactors, int **factors) {
int size;
int d;
int i;
int sqrtnum;
if(num < 2) {
(*nfactors) = 0;
(*factors) = NULL;
return MPI_SUCCESS;
}
/* Allocate the array of prime factors which cannot exceed log_2(num) entries */
sqrtnum = ceil(sqrt(num));
size = ceil(log(num) / log(2));
*factors = (int *) malloc((unsigned) size * sizeof(int));
i = 0;
/* determine all occurences of factor 2 */
while((num % 2) == 0) {
num /= 2;
(*factors)[i++] = 2;
}
/* determine all occurences of uneven prime numbers up to sqrt(num) */
d = 3;
for(d = 3; (num > 1) && (d <= sqrtnum); d += 2) {
while((num % d) == 0) {
num /= d;
(*factors)[i++] = d;
}
}
/* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
if(num != 1) {
(*factors)[i++] = num;
}
(*nfactors) = i;
return MPI_SUCCESS;
}
#ifdef TEST_MAIN
int main(int argc,char** argv){
if (argc != 2) {fprintf(stderr,"usage: <N> -> to fit in a 2D grid");return EXIT_FAILURE;}
int dims2[2] = {0,0};
int n = atoi(argv[1]);
if (MPI_Dims_create(n,2,dims2)){return EXIT_FAILURE;} printf("%d 2D division:\t%d-%d\n",n,dims2[0],dims2[1]);
}
#endif //TEST_MAIN

@ -0,0 +1,499 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <errno.h>
#include "macros.h"
#include "sparseMatrix.h"
#include "utils.h"
/////////////// no offset --> SINGLE implementation functions
#ifndef SPARSE_UTILS_C
#define SPARSE_UTILS_C
///DENSE accumulator utils
/*
* alloc threads' aux arrays once and split them in threads' structures
* so free them once from the first thread struct, with the original pointers returned from the alloc
*/
ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size){ //TODO PERF WITH NEXT
ACC_DENSE* out = NULL;
double* vAll = NULL;
ulong* vAllNzIdx = NULL;
if (!(out = calloc(num,sizeof(*out)))){
ERRPRINT("_initAccVectors aux struct alloc failed\n");
return NULL;
}
if (!(vAll = calloc(num*size,sizeof(*vAll)))) {
ERRPRINT("_initAccVectors aux dense vectors alloc failed\n");
goto err;
}
if (!(vAllNzIdx = calloc(num*size,sizeof(*vAllNzIdx)))) {
ERRPRINT("_initAccVectors aux dense vectors' idx alloc failed\n");
goto err;
}
for (ulong i=0; i<num; i++){
out[i].vLen = size; //TODO USELESS INFO?
out[i].v = vAll + i*size;
out[i].nnzIdx = vAllNzIdx + i*size;;
out[i].nnzIdxMap.len = 0;
}
return out;
err:
free(out);
free(vAll);
free(vAllNzIdx);
return NULL;
}
int allocAccDense(ACC_DENSE* v,ulong size){
v->vLen = size;
if (!(v->v = calloc(size,sizeof(*(v->v))))) {
ERRPRINT("_initAccVectors aux dense vector alloc failed\n");
return EXIT_FAILURE;
}
if (!(v->nnzIdx = calloc(size,sizeof(*(v->nnzIdx))))) {
ERRPRINT("_initAccVectors aux dense vector' idx alloc failed\n");
return EXIT_FAILURE;
}
if (initSpVectIdxDenseAcc(size, &v->nnzIdxMap)) return EXIT_FAILURE;
return EXIT_SUCCESS;
}
ACC_DENSE* _initAccVectors(ulong num,ulong size){
ACC_DENSE* out = NULL;
if (!(out = calloc(num,sizeof(*out)))){
ERRPRINT("_initAccVectors aux struct alloc failed\n");
return NULL;
}
for (ulong i=0; i<num; i++){
if (allocAccDense(out+i,size)) goto _err;
}
return out;
_err:
for (ulong i=0; i<num; i++){
if (out[i].v) free(out[i].v);
if (out[i].nnzIdx) free(out[i].nnzIdx);
}
free(out);
return NULL;
}
void freeAccsDense(ACC_DENSE* vectors,ulong num){
for (ulong i=0; i<num; i++){
free(vectors[i].v);
free(vectors[i].nnzIdx);
free(vectors[i].nnzIdxMap.idxsMap);
}
free(vectors);
}
void _freeAccsDenseChecks(ACC_DENSE* vectors,ulong num){
if (!vectors) return;
for (ulong i=0; i<num; i++){
if(vectors[i].v) free(vectors[i].v);
if(vectors[i].nnzIdx) free(vectors[i].nnzIdx);
}
free(vectors);
}
int initSpVectIdxDenseAcc(idx_t idxMax,SPVECT_IDX_DENSE_MAP* vectIdxsMap){
vectIdxsMap->len = 0;
nnz_idxs_flags_t* idxMaps = &vectIdxsMap->idxsMap;
#if SPVECT_IDX_BITWISE == TRUE //nnz presence falgs as bitflags in limbs
vectIdxsMap->idxsMapN = INT_DIV_CEIL(idxMax, LIMB_SIZE_BIT);
#else //nnz presence falgs in a single array
vectIdxsMap->idxsMapN = idxMax;
#endif //SPVECT_IDX_BITWISE == TRUE
if (!(*idxMaps = calloc(vectIdxsMap->idxsMapN, sizeof(**idxMaps)))) {
ERRPRINT("initSpVectIdxDenseAcc\tidxMaps SPVECT_IDX_BITWISE callc err\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
void checkOverallocPercent(ulong* forecastedSizes,spmat* AB){
for (ulong r=0,rSize,forecastedSize; r < AB->M; r++){
forecastedSize = forecastedSizes[r];
#ifdef ROWLENS
rSize = AB->RL[r];
#else
rSize = AB->IRP[r+1] - AB->IRP[r];
#endif
DEBUGCHECKS{
if ( forecastedSize < rSize ){
ERRPRINT("BAD FORECASTING\n");
assert(forecastedSize >= rSize );
}
}
DEBUGPRINT
printf("extra forecastedSize of row: %lu\t=\t%lf %% \n",
r,100*(forecastedSize-rSize) / (double) forecastedSize);
}
idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ;
printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n",
extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]);
}
int spmatDiff(spmat* A, spmat* B){
if (A->NZ != B->NZ){
ERRPRINT("NZ differ\n");
return EXIT_FAILURE;
}
if (memcmp(A->IRP,B->IRP,A->M)){
ERRPRINT("IRP differ\n");
return EXIT_FAILURE;
}
if (doubleVectorsDiff(A->AS,B->AS,A->NZ,NULL)){
ERRPRINT("AS DIFFER\n");
return EXIT_FAILURE;
}
if (memcmp(A->JA,B->JA,A->NZ)){
ERRPRINT("JA differ\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
double* CSRToDense(spmat* sparseMat){
double* denseMat;
ulong i,j,idxNZ,denseSize;
if (__builtin_umull_overflow(sparseMat->M,sparseMat->N,&denseSize)){
ERRPRINT("overflow in dense allocation\n");
return NULL;
}
if (!(denseMat = calloc(denseSize, sizeof(*denseMat)))){
ERRPRINT("dense matrix alloc failed\n");
return NULL;
}
for (i=0;i<sparseMat->M;i++){
for (idxNZ=sparseMat->IRP[i]; idxNZ<sparseMat->IRP[i+1]; ++idxNZ){
j = sparseMat->JA[idxNZ];
//converting sparse item into dense entry
denseMat[(ulong) IDX2D(i,j,sparseMat->N)] = sparseMat->AS[idxNZ];
}
}
return denseMat;
}
void printSparseMatrix(spmat* spMatrix,char justNZMarkers){
double* denseMat = CSRToDense(spMatrix);
if (!denseMat) return;
printMatrix(denseMat,spMatrix->M,spMatrix->N,justNZMarkers);
free(denseMat);
}
static inline int _colsPartitioningUnifRanges_init(spmat* A,uint gridCols,
spmat** colParts,idx_t** colPartsLens){
spmat* colPart;
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *tmpJA;
///alloc/init partitions structures
if (!(*colParts = calloc(gridCols, sizeof(**colParts)))){
ERRPRINT("colsPartitioningUnifRanges\tcolumns partitions of A calloc fail\n");
return EXIT_FAILURE;
}
for (ulong i=0,colBlock; i<gridCols; i++){
colBlock = UNIF_REMINDER_DISTRI(i,_colBlock,_colBlockRem);
colPart = *colParts + i;
if (allocSpMatrixInternal(A->M,colBlock,colPart)){
ERRPRINT("colsPartitioningUnifRanges\tallocSpMatrixInternal partition err\n");
return EXIT_FAILURE;
}
//TODO TODO overalloc A cols partitions NZ arrays, then realloc
if (!(colPart->AS = malloc(A->NZ * sizeof(*A->AS)))){
ERRPRINT("colPart of A overalloc of AS errd\n");
return EXIT_FAILURE;
}
if (!(colPart->JA = malloc(A->NZ * sizeof(*A->JA)))){
ERRPRINT("colPart of A overalloc of JA errd\n");
return EXIT_FAILURE;
}
}
//for each A col partition -> last copied nz index = nnz copied ammount
if (! (*colPartsLens = calloc(gridCols, sizeof(**colPartsLens))) ) {
ERRPRINT("colsPartitioningUnifRanges: colPartsLens calloc errd\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,uint gridCols,
spmat* colParts,idx_t* colPartsLens){
spmat* colPart;
double* tmpAS; idx_t* tmpJA;
//realloc overallcd A parts NZ arrays (TODO -> downsizing -> nofails?)
for (ulong i=0,partLen; i<gridCols; i++){
colPart = colParts + i;
partLen = colPartsLens[i];
if (!(tmpAS = realloc(colPart->AS,partLen*sizeof(*(colPart->AS))))){
ERRPRINT("realloc overallocated cols partition AS array\n");
return EXIT_FAILURE;
}
colPart->AS = tmpAS;
if (!(tmpJA = realloc(colPart->JA,partLen*sizeof(*(colPart->JA))))){
ERRPRINT("realloc overallocated cols partition JA array\n");
return EXIT_FAILURE;
}
colPart->JA = tmpJA;
colPart->NZ = partLen;
colPart->IRP[A->M] = partLen;
}
return EXIT_SUCCESS;
}
#endif
#ifndef OFF_F
#error generic implementation requires OFF_F defined
#endif
//////////////////////// CSR SPECIFIC -- TODO RENAME //////////////////
///SPARSE MATRIX PARTITIONING
idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){
ulong subRowsN = A->M * gridCols;
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols;
ulong* offsets = malloc( (subRowsN+1) * sizeof(*offsets) );
if (!offsets) {
ERRPRINT("colsOffsetsPartitioningUnifRanges:\toffsets malloc errd\n");
return NULL;
}
///OFFSETS COMPUTE FOR COL GROUPS -> O( A.NZ )
for (ulong r=0, j=0; r<A->M; j=A->IRP[++r]-OFF_F){
offsets[ IDX2D(r,0,gridCols) ] = j; //row's first gc start is costrained
//navigate column groups inside current row
for (ulong gc=1,gcStartCol; gc<gridCols; gc++){
gcStartCol = UNIF_REMINDER_DISTRI_STARTIDX(gc,_colBlock,_colBlockRem);
//goto GroupCols start entry,keeping A's nnz entries navigation (idx j)
//for (ulong c=A->JA[j]-OFF_F; c<gcStartCol && j < A->IRP[r+1]-OFF_F; c=A->JA[++j]-OFF_F);
while (j < A->IRP[r+1]-OFF_F && A->JA[j]-OFF_F < gcStartCol)
j++;
offsets[ IDX2D(r,gc,gridCols) ] = j; //row's gc group startIdx
}
}
offsets[subRowsN] = A->NZ; //last row's partition end costrained
return offsets;
}
spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols,
idx_t** colPartsOffsets)
{
spmat *colParts = NULL, *colPart;
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols;
ulong *colPartsLens=NULL, *tmpJA;
double* tmpAS;
///alloc/init partitions structures
idx_t* colOffsets = NULL;
if (!(colOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(A,gridCols)))
goto _err;
if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens))
goto _err;
//OFFSET BASED COPY OF A.COL_GROUPS -> O( A.NZ )
for (idx_t r=0,gcId=0; r<A->M; r++){
for (idx_t gc=0,gcStartIdx=0,gLen=0; gc<gridCols; gc++,gcId++){
//gcId = IDX2D(r,gc,gridCols); //seqent. scan of parts
colPart = colParts + gc;
gcStartIdx = colOffsets[ gcId ];
gLen = colOffsets[ gcId+1 ] - gcStartIdx;
colPart->IRP[r] = colPartsLens[gc]; //new line for the col partition
//actual copy of nnz entries to colPartitions
memcpy(colPart->AS+colPart->IRP[r], A->AS+gcStartIdx, gLen*sizeof(*A->AS));
memcpy(colPart->JA+colPart->IRP[r], A->JA+gcStartIdx, gLen*sizeof(*A->JA));
colPartsLens[gc] += gLen;
#ifdef ROWLENS
colPart->RL[r] = i;
#endif
}
}
//realloc overallcd A parts NZ arrays
if(_colsPartitioningUnifRanges_finalRealloc(A,gridCols,colParts,colPartsLens))
goto _err;
free(colPartsLens);
if (colPartsOffsets) *colPartsOffsets = colOffsets; //save for the caller
else free(colOffsets);
return colParts;
_err:
free(*colPartsOffsets);
for (ulong i=0; i<gridCols; i++) freeSpmatInternal(colParts+i);
free(colOffsets);
free(colParts);
free(colPartsLens);
return NULL;
}
spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){
spmat *colParts, *colPart;
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *colPartsLens=NULL, *tmpJA;
double* tmpAS;
///alloc/init partitions structures
if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens))
goto _err;
/* TODO
* Parallelize: 2for collapse OMP, gcEndCol -> startIdxize, ...
* oppure wrappare cio in static inline
*/
for (ulong r=0, j=0; r<A->M; j=A->IRP[++r]-OFF_F){
//navigate column groups inside current row
for (ulong gc=0,gcEndCol=0,i; gc<gridCols ; gc++,j+=i){
i = 0; //@i=len current subpartition of row @r to copy
colPart = colParts + gc;
//NB: not shifting IRP because is handled as internal implementation component
//But JA idx memcopied -> kept as they were originally, handled with shift in functions
colPart->IRP[r] = colPartsLens[gc];
gcEndCol += UNIF_REMINDER_DISTRI(gc,_colBlock,_colBlockRem);
//goto next GroupCols,keeping A's nnz entries navigation ( index j+i )
//for (ulong c=A->JA[j+i]-OFF_F; c<gcEndCol && j+i < A->IRP[r+1]-OFF_F; c=A->JA[j+ ++i]-OFF_F);
while ( j+i < A->IRP[r+1]-OFF_F && A->JA[j+i]-OFF_F < gcEndCol ) i++;
memcpy(colPart->AS+colPart->IRP[r], A->AS+j, i*sizeof(*A->AS));
memcpy(colPart->JA+colPart->IRP[r], A->JA+j, i*sizeof(*A->JA));
colPartsLens[gc] += i;
#ifdef ROWLENS
colPart->RL[r] = i;
#endif
}
}
//realloc overallcd A parts NZ arrays
if(_colsPartitioningUnifRanges_finalRealloc(A,gridCols,colParts,colPartsLens))
goto _err;
free(colPartsLens);
return colParts;
_err:
for (ulong i=0; i<gridCols; i++) freeSpmatInternal(colParts+i);
if(colParts) free(colParts);
if(colPartsLens) free(colPartsLens);
return NULL;
}
void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB,
idx_t gridCols,idx_t* bColOffsets){
idx_t* abColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(AB, gridCols);
assert(abColOffsets); //partitioning error
for (idx_t rLen,forecast,partId=0; partId<AB->M*gridCols; partId++){
forecast = forecastedSizes[partId];
rLen = abColOffsets[partId+1] - abColOffsets[partId];
DEBUGCHECKS assert(forecast >= rLen);
}
idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ;
printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n",
extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]);
free(abColOffsets);
}
#ifdef SPARSEUTILS_MAIN_TEST ///unit test embbeded
///inline export here
//SPMV_CHUNKS_DISTR spmvChunksFair;
spmat* allocSpMatrix(ulong rows, ulong cols);
int allocSpMatrixInternal(ulong rows, ulong cols, spmat* mat);
void freeSpmatInternal(spmat* mat);
void freeSpmat(spmat* mat);
////INTERNAL TEST FUNCTIONS
//test that each row's partition from colsOffsetsPartitioningUnifRanges is in the correct index range
#include <alloca.h>
int testColsOffsetsPartitioningUnifRanges(spmat* mat,ulong gridCols,ulong* partsOffs){
ulong _colBlock = mat->N/gridCols, _colBlockRem = mat->N%gridCols;
ulong j=0; //CSR scanning nnz idx
ulong* colPartsPopulations = alloca(gridCols * sizeof(*colPartsPopulations));
memset(colPartsPopulations,0,gridCols * sizeof(*colPartsPopulations));
for (ulong r=0,pId=0; r<mat->M; r++){
for (ulong gc=0,pStartIdx,pEndIdx; gc<gridCols; gc++,pId++){
pStartIdx = UNIF_REMINDER_DISTRI_STARTIDX(gc,_colBlock,_colBlockRem);
pEndIdx = UNIF_REMINDER_DISTRI_STARTIDX(gc+1,_colBlock,_colBlockRem)-1;
//pId=IDX2D(r,gc,gridCols);
for (ulong idx=partsOffs[pId],c; idx<partsOffs[pId+1]; idx++,j++){
c = mat->JA[idx];
assert(j == idx); //consecutive index in partitioning
assert(pStartIdx <= c && c <= pEndIdx); //colRange
assert(mat->IRP[r] <= idx && idx <= mat->IRP[r+1] ); //rowRange
}
colPartsPopulations[gc] += partsOffs[pId+1] - partsOffs[pId];
}
}
assert(j == mat->NZ);
ulong s=0;
for (ulong gc=0,partSize; gc < gridCols; gc++,s+=partSize){
partSize = colPartsPopulations[gc];
double partShare=partSize/(double)mat->NZ,partsAvg=1/(double)gridCols;
double partShareAvgDiff = partShare - partsAvg;
printf("colPartition %lu has:\t%lu = %lf of NNZ\t\t .. %lf\tAVG diff\n",
gc,partSize,partShare,partShareAvgDiff);
}
assert(s == mat->NZ); //TODO DUPLICATED
return EXIT_SUCCESS;
}
CONFIG Conf = {
.gridRows = 8,
.gridCols = 8,
};
#include "parser.h"
int main(int argc, char** argv){
int out=EXIT_FAILURE;
if (init_urndfd()) return out;
if (argc < 2 ) {ERRPRINT("COO MATRIX FOR TEST"); return out;}
////parse sparse matrix and dense vector
spmat* mat;
char* trgtMatrix = TMP_EXTRACTED_MARTIX;
if (extractInTmpFS(argv[1],TMP_EXTRACTED_MARTIX) < 0)
trgtMatrix = argv[1];
if (!(mat = MMtoCSR(trgtMatrix))){
ERRPRINT("err during conversion MM -> CSR\n");
return out;
}
////partitioning test
ulong* colsPartitions = colsOffsetsPartitioningUnifRanges_0(mat,Conf.gridCols);
if (!colsPartitions) goto _free;
if (testColsOffsetsPartitioningUnifRanges(mat,Conf.gridCols,colsPartitions))
goto _free;
out=EXIT_SUCCESS;
printf("testColsOffsetsPartitioningUnifRanges passed with "
"mat: %lux%lu-%luNNZ\tgrid: %dx%d\n",
mat->M,mat->N,mat->NZ,Conf.gridRows,Conf.gridCols);
_free:
if (colsPartitions) free(colsPartitions);
return out;
}
#endif //SPARSEUTILS_MAIN_TEST

@ -0,0 +1,38 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "sparseUtilsMulti.h"
#define OFF_F 0
#include "sparseUtilsGeneric.c"
#undef OFF_F
#define OFF_F 1
#include "sparseUtilsGeneric.c"
#undef OFF_F

@ -0,0 +1,502 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
//various aux functions
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
//#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <errno.h>
#include <limits.h>
#include "macros.h"
#include "sparseMatrix.h"
#include "utils.h"
int urndFd; //will point to urandom device file
///IO
//UNBUFFERED IO
int write_wrap(int fd,void* src,size_t count){
ssize_t wr;
size_t written=0;
while (written < count){
wr=write(fd,src+written,count-written);
if (wr<0){
perror("write");
return wr;
}
written+=wr;
}
return 0;
}
int read_wrap(int fd,void* dst,size_t count){
ssize_t rd;
size_t readed=0;
while (readed < count){
rd=read(fd,dst+readed,count-readed);
if (rd<0){
perror("read");
return rd;
}
readed+=rd;
}
return 0;
}
int readALL_wrap(int fd,char** dst,size_t* count){
ssize_t rd=!0; //to allow count > fsize
size_t readed=0;
char allocated=0; //flag if required *dst allocation
if (!(*dst)){ //allocate dst buffer of same size of file
off_t seekCurr=lseek(fd,0,SEEK_CUR);
off_t fsize=lseek(fd,0,SEEK_END);
if( seekCurr==-1 || fsize==-1 || lseek(fd,seekCurr,SEEK_SET)==-1){
perror("lseek");
return EXIT_FAILURE;
}
*count=fsize;
if (!(*dst=malloc(fsize))){
fprintf(stderr,"malloc read_wrap file size buf error\n");
return EXIT_FAILURE;
}
allocated=!0;
}
//read loop
while (readed < *count && rd > 0){
rd=read(fd,(*dst)+readed,*count-readed);
if (rd<0){
perror("read");
if (allocated) free(*dst);
return rd;
}
readed+=rd;
}
if (readed < *count) (*dst)[readed]='\0'; //TODO NEEDED?
return EXIT_SUCCESS;
}
int init_urndfd(){ // wrap init urndFd
if((urndFd=open(DRNG_DEVFILE,O_RDONLY))<0){
perror("open DRNG_DEVFILE");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int createNewFile(char* const outFpath){
int mode=S_IRWXU;
errno = 0;
int outFd=open(outFpath, O_WRONLY | O_CREAT | O_TRUNC, mode);
if (errno==EEXIST) outFd=open(outFpath, O_WRONLY | O_TRUNC, mode);
if (outFd<0) perror("open outFd failed ");
return outFd;
}
///BUFFERED IO
//TODO series of 0 returns.... fix
int fread_wrap(FILE* fp,void* dst,size_t count){
int rd=0,toRead;
size_t readed=0;
while (readed < count){
toRead = count - readed;
rd=fread(dst+readed,1,toRead,fp);
if (rd != toRead){ //TODO SHORT ITEM COUNT
if (feof(fp)) return EOF;
else if (ferror(fp)){
ERRPRINT("fread_wrap errd\n");
return -2;
}
//else ERRPRINT("W** SHORT ITEM COUNT RETURN MEANS!?"); //TODO OO
}
readed+=rd;
}
return rd;
}
///STRUCTURED DATA IO
int writeDoubleVector(char* fpath,double* v,ulong size){
int fd,out=EXIT_FAILURE;
if ( (fd = createNewFile(fpath) ) < 0 ) goto _end;
write_wrap(fd,v,size * sizeof(*v)); //raw write of vector
out = EXIT_SUCCESS;
DEBUG printf("written double vector into %s RAW of %lu elements\n",fpath,size);
_end:
if (close(fd) == EOF) perror("close errd\n");
return out;
}
///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF
int writeDoubleVectorAsStr(char* fpath,double* v,ulong size){
int out=EXIT_FAILURE;
FILE* fp = fopen(fpath,"w");
if (!fp){
perror("fopen vector file write");
return EXIT_FAILURE;
}
for (ulong i=0; i<size; i++){
if (fprintf(fp,DOUBLE_STR_FORMAT,v[i]) < 0){
ERRPRINT("fprintf to out vector file errd\n");
goto _end;
}
}
out = EXIT_SUCCESS;
DEBUG printf("written vector into %s of %lu elements\n",fpath,size);
_end:
if (fclose(fp) == EOF) perror("fclose errd\n");
return out;
}
double* readDoubleVector(char* fpath,ulong* size){
int rd,hleft;
double *out,*tmp;
ulong i=0,vectorSize=RNDVECTORSIZE;
if (*size) vectorSize = *size;
FILE* fp = fopen(fpath,"r");
if (!fp){
perror("fopen vector file");
return NULL;
}
if (!(out = malloc(vectorSize * sizeof(*out)))){
ERRPRINT("vector read malloc fail for file\n");
goto _err;
}
while (1){
if (i >= vectorSize ){ //realloc the array
vectorSize *= VECTOR_STEP_REALLOC;
if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){
ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20);
goto _err;
}
out = tmp;
DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20);
}
ulong toRead = MIN(VECTOR_READ_BLOCK,(vectorSize-i));
if((rd = fread_wrap(fp,out + i,sizeof(*out)*toRead)) == -2){
ERRPRINT("fread_wrap errd\n");
goto _err;
}
if(feof(fp)) //TODO rd == EOF not work always != W*F???
break;
i += rd/sizeof(out);
if( (hleft = rd % sizeof(out)) ){
DEBUG hprintf("half left double read... rounding down fptr\n");
if(fseek(fp,-hleft,SEEK_CUR)){
perror("fseek in readDoubleVector");
goto _err;
}
}
}
//REALLOC THE ARRAY TO THE FINAL SIZE
assert( i > 0 );
if (!(tmp = realloc(out,*size*sizeof(*out)))){
ERRPRINT("realloc errd\n");
goto _err;
}
out = tmp;
DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size);
goto _free;
_err:
free(out);
out = NULL;
_free:
if (fclose(fp) == EOF) perror("fclose errd\n");
return out;
}
double* readDoubleVectorStr(char* fpath,ulong* size){
int fscanfOut;
double *out,*tmp;
ulong i=0,vectorSize=RNDVECTORSIZE;
if (*size) vectorSize = *size;
FILE* fp = fopen(fpath,"r");
if (!fp){
perror("fopen vector file");
return NULL;
}
if (!(out = malloc(vectorSize * sizeof(*out)))){
ERRPRINT("vector read malloc fail for file\n");
goto _err;
}
while (1){
if (i >= vectorSize ){ //realloc the array
vectorSize *= VECTOR_STEP_REALLOC;
if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){
ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20);
goto _err;
}
out = tmp;
DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20);
}
fscanfOut = fscanf(fp,DOUBLE_STR_FORMAT,out + i++ );
if ( fscanfOut == EOF && ferror(fp)){
perror("invalid fscanf");
goto _err;
}
if ( fscanfOut != 1 || fscanfOut == EOF ) break; //end of vector
}
//REALLOC THE ARRAY TO THE FINAL SIZE
assert( i > 0 );
*size = --i;
if (!(tmp = realloc(out,*size*sizeof(*out)))){
ERRPRINT("realloc errd\n");
goto _err;
}
out = tmp;
DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size);
goto _free;
_err:
free(out);
out = NULL;
_free:
if (fclose(fp) == EOF) perror("fclose errd\n");
return out;
}
///CFG-AUX
int getConfig(CONFIG* conf){
int changes=EXIT_FAILURE;
char *varVal,*ptr;
ulong val;
if ((varVal = getenv(GRID_ROWS))){
val=strtoul(varVal,&ptr,10);
if (ptr==varVal || val>= UINT_MAX){
perror("strtol errd");
} else {
conf->gridRows = val;
}
changes = EXIT_SUCCESS;
}
if ((varVal = getenv(GRID_COLS))){
val=strtoul(varVal,&ptr,10);
if (ptr==varVal || val>= UINT_MAX){
perror("strtol errd");
} else {
conf->gridCols = val;
}
changes = EXIT_SUCCESS;
}
return changes;
}
/////LIB-SORTING -- WRAPPERS
//comparing functions
static inline int cmp_idx_t(const void* a, const void*b){
idx_t aa=*((ulong*) a), bb = *((ulong*) b);
return aa==bb?0:aa>bb?1:-1;
}
static inline int cmpulong(const void* a, const void*b){
ulong aa=*((ulong*) a), bb = *((ulong*) b);
return aa==bb?0:aa>bb?1:-1;
}
static inline int cmpuint(const void* a, const void*b){
uint aa=*((uint*) a), bb = *((uint*) b);
return aa==bb?0:aa>bb?1:-1;
}
static inline int cmpRbNode(const void* a, const void* b){
rbNode *aa=(rbNode*) a, *bb = (rbNode*) b;
return cmp_idx_t(&aa->key,&bb->key);
}
//sorting functions
void sort_idx_t(idx_t* arr, idx_t len){
qsort(arr,len,sizeof(*arr),cmp_idx_t);
}
void sortulong(ulong* arr, ulong len){
qsort(arr,len,sizeof(*arr),cmpulong);
}
void sortuint(uint* arr, uint len){
qsort(arr,len,sizeof(*arr),cmpuint);
}
void sortRbNode(rbNode* arr, idx_t len){
qsort(arr,len,sizeof(*arr),cmpRbNode);
}
////Tests AUX
inline void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen){
if (arrLen > 0 ) return;
for (idx_t i=1,last=arrSorted[0]; i<arrLen; last = arrSorted[i++])
assert( arrSorted[i] != last );
}
///MATH UTILS
static inline int rndDouble_sinAll(double* d){
if(read_wrap(urndFd,(void*) d,sizeof(*d))){
ERRPRINT("read_wrap failed to read rnd double\n");
return EXIT_FAILURE;
}
*d = sin(*d) * MAXRND;
return EXIT_SUCCESS;
}
long _rndHold; //permanent storage of rnd longs
static inline int rndDouble_sinDecimal(double* d){
if(read_wrap(urndFd,(void*) &_rndHold,sizeof(_rndHold))){
ERRPRINT("read_wrap failed to read holder for rnd double\n");
return EXIT_FAILURE;
}
*d = (_rndHold % (ulong) ceil(MAXRND)) + sin(_rndHold);
return EXIT_SUCCESS;
}
void statsAvgVar(double* values,uint numVals, double* out){
double sum=0,sumSquare=0;
for (uint i=0; i<numVals; i++){
sum += values[i];
sumSquare += values[i]*values[i];
}
out[0] = sum/numVals; //AVG
out[1] = sumSquare/numVals - (out[0] * out[0]); //VAR
}
/// MATRIX - VECTOR COMPUTE UTILS
int fillRndVector(ulong size, double* v){
for( ulong x=0; x<size; ++x ){
if(rndDouble_sinAll( v+x )) return EXIT_FAILURE;
#ifdef RNDVECTMIN
v[x] += RNDVECTMIN;
#endif
}
return EXIT_SUCCESS;
}
//convention true result in @a, toCheck in @b
int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax){
int out = EXIT_SUCCESS;
double diff,diffAbs,_diffMax=0;
if (diffMax) *diffMax = 0;
else diffMax = &_diffMax;
for (ulong i=0; i<n; i++){
diff = a[i] - b[i];
diffAbs = ABS( diff );
if( diffAbs > DOUBLE_DIFF_THREASH ){
out = EXIT_FAILURE;
ERRPRINTS("DOUBLE VECTORS DIFF: DOUBLE_DIFF_THREASH=%lf\t<\t"
"|%13lg| = %lf %% of @a[%lu]\n",
DOUBLE_DIFF_THREASH,diff,100*diffAbs/ABS(a[i]),i);
#ifdef DOUBLE_VECT_DIFF_EARLY_EXIT
/*#pragma message("DOUBLE_VECT_DIFF_EARLY_EXIT: only first diff double reported")*/
return EXIT_FAILURE;
#endif
}
if ( ABS(*diffMax) < diffAbs ) *diffMax = diff;
}
DEBUG{
printf("\nchecked diff %s"CEND" between 2 double vector of %lu elements"
"\tmax diff: %le %s threash: %le\n", !out?CCC"OK":CCCERR"ERR!",
n,*diffMax,*diffMax<DOUBLE_DIFF_THREASH?"<":">=",
DOUBLE_DIFF_THREASH);
if (!*diffMax){ //self diff check uselss TODO REMOVE
if (!memcpy(a,b,n*sizeof(*a)))
printf("EXACT MATCHING AMONG THE 2 DOUBLE VECTORS\n!");
}
}
return out;
}
void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers){
printf("printing matrix: %lu x %lu\n",m,n);
ulong i,j;
for (i=0;i<m;i++){
for (j=0;j<n;j++){
if (justNZMarkers) printf("%s",mat[IDX2D(i,j,n)]?".":" ");
else printf("%1.1lf ",mat[IDX2D(i,j,n)]);
}
printf("\n");
}
printf("\n");
}
void printVector(double* v,ulong size){
for( ulong i=0;i<size;i++) printf("%1.1lf ",v[i]);
printf("\n");
}
////VAR -- MISC
/* search for pattern in @str from NULL terminated @patterns array
* return pointer of pattern in @str if any, otherwise NULL
*/
static inline char* searchPatternsInStr(char** patterns,char* str){
char* out = NULL;
for (char** p=patterns; !out && *p; p++) out = strstr(str,*p);
return out;
}
/* search for pattern in @str from NULL terminated @patterns array
* return pointer of pattern in @str if any, otherwise NULL
*/
static inline char* searchPatternInStrs(char* pattern,char** strings){
char* out = NULL;
for (char** str=strings; !out && *str; str++) out = strstr(*str,pattern);
return out;
}
inline int appendArr(ulong val,APPENDARRAY* list){
return 0; //TODO
}
///DECOMPRESSION IN TMPFS ; TODO vector not puttable here... figure out nicer way..
char* COMPRESS_EXTENSIONS[] = { ".gz", ".xz", ".bz2", ".zip", NULL};
#define STD_DECOMPR_FLAGS " -d -c "
char* DECOMPRESS_CMDS[] = {"gzip" STD_DECOMPR_FLAGS, "xz" STD_DECOMPR_FLAGS, "bzip2" STD_DECOMPR_FLAGS,
"unzip -c", NULL }; //zip is too old ;)
int extractInTmpFS(char* path, char* tmpFsDecompressPath){
char* ext = searchPatternsInStr(COMPRESS_EXTENSIONS,path);
if (!ext) return -1; //NOT SUPPORTED COMPRESS EXTENSION -> TRY REGULAR PATH
//search first 2 char after dot of ext in DECOMPRESS_CMDS to get the decompress cmd
if (strlen(ext) < 3 ){
ERRPRINTS("NOT SUPPORTED DECOMPRESSION:\textension %s too short to be matched",ext);
return -1;
}
char extStart[3];
extStart[0] = ext[1];
extStart[1] = ext[2];
extStart[2] = '\0';
char* decompressCmdBase = searchPatternInStrs(extStart,DECOMPRESS_CMDS);
if (!decompressCmdBase){
ERRPRINTS("NOT SUPPORTED DECOMPRESS FOR %s\n",ext);
return -1;
}
uint cmdLen = strlen(decompressCmdBase) + strlen(path) + 4 + strlen(tmpFsDecompressPath);
char* decompressCmd = alloca(cmdLen+1);
if (snprintf(decompressCmd,cmdLen+1,"%s %s > %s",
decompressCmdBase,path,tmpFsDecompressPath) < 0){
ERRPRINT("extractInTmpFS, snprintf errd\n");
}
VERBOSE printf("decompressing %s --> %s\ncmd:\t%s\n",path,tmpFsDecompressPath,decompressCmd);
return system(decompressCmd);
}

@ -0,0 +1,107 @@
#include "../include/Sp3MM_CSR_OMP_Multi.h"
#include "../include/utils.h"
/**
* @brief performs multiplication of two sparse matrices
* A and B stored in the CRS format. The resulting matrix is C
*
* @param[in] a_m number of columns of A
* @param[in] a_n number of rows of A
* @param[in] a_nz number of non zero elements of A
* @param[in] a_as array with the non zero coefficients of A
* @param[in] a_ja array with the column indices of the non
* zero coefficients of A
* @param[in] a_irp array with the indices of the beginning of
* each row in a_as and a_ja
* @param[in] a_rl array with the lengths of each rows of A
* @param[in] a_max_row_nz maximum number of coefficients in a row in A
* @param[in] b_m number of columns of B
* @param[in] b_n number of rows of B
* @param[in] b_nz number of non zero elements of B
* @param[in] b_as array with the non zero coefficients of B
* @param[in] b_ja array with the column indices of the non
* zero coefficients of B
* @param[in] b_irp array with the indices of the beginning of
* each row in b_as and b_ja
* @param[in] b_rl array with the lengths of each rows of B
* @param[in] b_max_row_nz maximum number of coefficients in a row in B
* @param[out] c_m number of colums of C
* @param[out] c_n number of rows of C
* @param[out] c_nz number of non zero elements in C
* @param[out] c_as array with the non zero coefficients of C
* @param[out] c_ja array with the column indices of the non
* zero coefficients of C
* @param[out] c_irp array with the indices of the beginning of
* each row in c_as and c_ja
* @param[out] c_rl array with the lengths of each rows of C
* @param[out] c_max_row_nz maximum number of coefficients in a row in C
* @param[out] info return value to check if the operation was successful
*/
#ifdef ROWLENS
void psb_f_spmm_row_by_row_ub_0(idx_t a_m, idx_t a_n, idx_t a_nz,
double *a_as, idx_t *a_ja,
idx_t *a_irp, idx_t *a_rl, idx_t a_max_row_nz,
idx_t b_m, idx_t b_n, idx_t b_nz,
double *b_as, idx_t *b_ja,
idx_t *b_irp, idx_t *b_rl, idx_t b_max_row_nz,
idx_t *c_m, idx_t *c_n, idx_t *c_nz,
double **c_as, idx_t **c_ja,
idx_t **c_irp, idx_t **c_rl, idx_t *c_max_row_nz,
int info)
#else
void psb_f_spmm_row_by_row_ub_0(idx_t a_m, idx_t a_n, idx_t a_nz,
double *a_as, idx_t *a_ja,
idx_t *a_irp, idx_t a_max_row_nz,
idx_t b_m, idx_t b_n, idx_t b_nz,
double *b_as, idx_t *b_ja,
idx_t *b_irp, idx_t b_max_row_nz,
idx_t *c_m, idx_t *c_n, idx_t *c_nz,
double **c_as, idx_t **c_ja,
idx_t **c_irp, idx_t *c_max_row_nz,
int info)
#endif
{
int rc;
spmat *a, *b, *c;
CONFIG *cfg;
#ifdef ROWLENS
a->RL = a_rl;
b->RL = b_rl;
#endif//ROWLENS
// setting up cfg
// TODO : CHECK THAT THIS IS COMPATIBLE WITH PSB
rc = getConfig(cfg);
// setting up spmat type matrices
a->M = a_m;
a->N = a_n;
a->NZ = a_nz;
a->AS = a_as;
a->JA = a_ja;
a->IRP = a_irp;
a->MAX_ROW_NZ = a_max_row_nz;
b->M = b_m;
b->N = b_n;
b->NZ = b_nz;
b->AS = b_as;
b->JA = b_ja;
b->IRP = b_irp;
b->MAX_ROW_NZ = b_max_row_nz;
// performing spmm
c = spmmRowByRow_0(a, b, cfg);
// output result
*(c_m) = c->M;
*(c_n) = c->N;
*(c_nz)= c->NZ;
*(c_as)= c->AS;
*(c_ja)= c->JA;
*(c_irp)=c->IRP;
#ifdef ROWLENS
*(c_rl)= c->RL;
#endif
}

@ -0,0 +1,94 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
//CORE IMPLEMENTATIONS HEADER
#ifndef SP3MM_CSR_OMP_MULTI_H
#define SP3MM_CSR_OMP_MULTI_H
///commons single implementation stuff
#include "macros.h"
#include "sparseMatrix.h"
///aux structures
//hold SPMM result over a unpartitionated space among threads-row[s' blocks]
typedef struct{
//space to hold SPMM output
ulong* JA;
double* AS;
ulong size; //num of entries allocated -> only dbg checks
ulong lastAssigned; //last JA&AS assigned index to an accumulator(atom)
SPACC* accs; //SPARSIFIED ACC POINTERS
uint accsNum;
} SPMM_ACC; //accumulator for SPMM
///compute function interface and its pointer definitions
typedef spmat* ( SPMM) (spmat*,spmat*,CONFIG*);
typedef spmat* (*SPMM_INTERF) (spmat*,spmat*,CONFIG*);
typedef spmat* ( SP3MM) (spmat*,spmat*,spmat*,CONFIG*,SPMM_INTERF);
typedef spmat* (*SP3MM_INTERF) (spmat*,spmat*,spmat*,CONFIG*,SPMM_INTERF);
typedef enum {
_1D_DIRECT,
_1D_BLOCKS,
_2D_OFFSET,
_2D_ALLOCD
} SPMM_IMPL_TYPE;
///-- commons single implementation stuff
///includes
#include "linuxK_rbtree_minimalized.h"
#include "Sp3MM_CSR_OMP_SymbStep_Multi.h"
//extern char TRGT_IMPL_START_IDX; //multi implementation switch
#include "sparseUtilsMulti.h"
#ifdef OFF_F //save "includer" OFF_F value before overwriting it
#pragma push_macro("OFF_F")
#define _OFF_F_OLD
#undef OFF_F
#endif
#define OFF_F 0
#include "Sp3MM_CSR_OMP_UB_Generic.h"
#include "Sp3MM_CSR_OMP_Num_Generic.h"
#undef OFF_F
#define OFF_F 1
#include "Sp3MM_CSR_OMP_UB_Generic.h"
#include "Sp3MM_CSR_OMP_Num_Generic.h"
#undef OFF_F
#ifdef _OFF_F_OLD
#pragma pop_macro("OFF_F")
#undef _OFF_F_OLD
#endif
#endif //SP3MM_CSR_OMP_MULTI_H

@ -0,0 +1,95 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OFF_F
/*#pragma message("generic implementation requires OFF_F defined")*/
#error generic implementation requires OFF_F defined
#endif
/////SYMBOLIC - NUMERIC IMPLEMENTATIONS
///SP3MM FUNCTIONS
/*
* triple matrix multiplication among @R * @AC * @P using gustavson parallel implementation
* implmented as a pair of subsequent spmm operations
* if @conf->spmm != NULL, it will be used as spmm function, otherwise euristics will be
* used to decide wich implementation to use
*/
SP3MM CAT(sp3mmRowByRowPair_SymbNum_,OFF_F);
/*
* row-by-row-by-row implementation: forwarding @R*@AC rth row to P for row-by-row
* accumulation in preallocated space, TODO exactly determined
* basic parallelization: 1thread per @R's rows that will also forward the result to P
*/
SP3MM CAT(sp3mmRowByRowMerged_SymbNum_,OFF_F);
///SUB FUNCTIONS
///SPMM FUNCTIONS
/*
* sparse parallel implementation of @A * @B parallelizing Gustavson row-by-row
* formulation using an aux dense vector @_auxDense
* return resulting product matrix
*/
SPMM CAT(spmmRowByRow_SymbNum_,OFF_F);
/*
* sparse parallel implementation of @A * @B parallelizing Gustavson
* with partitioning of @A in @conf->gridRows blocks of rows
* return resulting product matrix
*/
SPMM CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F);
/*
* sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D
* with partitioning of
* @A into rows groups, uniform rows division
* @B into cols groups, uniform cols division, accessed by aux offsets
*/
SPMM CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F);
/*
* sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D
* with partitioning of
* @A into rows groups, uniform rows division
* @B into cols groups, uniform cols division, ALLOCATED as CSR submatrixes
*/
SPMM CAT(spmmRowByRow2DBlocksAllocated_SymbNum_,OFF_F);
///implementation wrappers as static array of function pointers
//sp3mm as pair of spmm
static SPMM_INTERF CAT(Spmm_SymbNum_Funcs_,OFF_F)[] = {
& CAT(spmmRowByRow_SymbNum_,OFF_F),
& CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F),
//& CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F),
//& CAT(spmmRowByRow2DBlocksAllocated_SymbNum_,OFF_F)
};
//sp3mm as pair of spmm
static SP3MM_INTERF CAT(Sp3mm_SymbNum_Funcs_,OFF_F)[] = {
//& CAT(sp3mmRowByRowMerged_SymbNum_,OFF_F)
};

@ -0,0 +1,195 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* CSR Sp[3]MM Symbolic step implementations
* target: compute the output matrix size and the row lens for preallocation
* direct write out partial results
* See interfaces in respective header
*
* MultiImplementations functions with all parameters
* several functions (lower level) has config based on
* OUT_IDXS for returing or using also symbMul Output idxs somehow
* COL_PARTS for returing or using also distribution of symb out idxs in col partition
*/
///MultiImplementations
///setup aux macros for different signatures implementation via #if arith expr
#ifdef OUT_IDXS
#define _OUT_IDXS TRUE
#else
#define _OUT_IDXS FALSE
#define OUT_IDXS _UNDEF
#endif
#ifdef COL_PARTS
#define _COL_PARTS TRUE
#else
#define _COL_PARTS FALSE
#define COL_PARTS _UNDEF
#endif
///
#if !defined OFF_F
#error generic implementations requires OFF_F
#endif
/*
* Compute symbolic product of (nnz indexes of) row @aRowJA and matrix @b
* insert nnz indexes of the mul. result row as nodes in a rbtree rooted at @root
* with nodes in @nodes which have to be enough for the mul result row (use an UB)
* Retuns: multiplication result row NNZ number,se CONFIG_MACROS below for more
*
* CONFIG_MACROS:
* if _OUT_IDXS == TRUE return mul.result row nnz idxs in @outIdxs
* ifdef: OUT_IDXS_RBTREE_NODES: nnz indexes returned inplace sorting rbtree
* as nnz indexes(JA) of the mul result row
* else: stop at returning the mul. result row lenght
* if _COL_PARTS == TRUE return the number of nonzero elements in
* in each of the @gridCols column partitions inside @rowColPartsLens
* OFF_F: offset back indexes from fortran
* TODO also output indexes are shifted (see c_b )
idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F) (idx_t* aRowJA,idx_t aRowLen,
spmat* b,rbRoot* root,rbNode* nodes
#if _OUT_IDXS == TRUE
#ifndef OUT_IDXS_RBTREE_NODES
,idx_t* outIdxs
#endif
#endif
#if _COL_PARTS == TRUE
,ushort gridCols,idx_t* rowColPartsOffsets
#endif
);
*/
/*
* SPVECT_IDX_DENSE_MAP based, as SpMM_Row_Symb_Rbtree but with idxMap aux idx keeping
* CONFIG_MACROS (new)
* IDX_RMUL_SYMB_RBTREE && ( _OUT_IDXS == T || _COL_PARTS == T ):
* (tmp) symb mult out indexes will be kept via a rbtree
* otherwise directly in the out array appending them and then sorting them
* (potentially same n log n)
idx_t CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F)
(
idx_t* aRowJA, idx_t aRowLen, spmat* b, SPVECT_IDX_DENSE_MAP* idxsMapAcc
#if _OUT_IDXS == TRUE
,idx_t* outIdxs
#if IDX_RMUL_SYMB_RBTREE == TRUE
,rbRoot* root, rbNode* nodes
#endif
#endif // _OUT_IDXS == TRUE
#if _COL_PARTS == TRUE
,ushort gridCols,idx_t* rowColPartsLens
#endif
);
*/
/*
* Compute symbolic product of sparse matrixes @a * @b
* Alloc aux structures based on a upper bounded allocation
* Returns array of exact row lens of the result mul matrix c=@a*@b
* plus an extra entry for the cumulative num of matrix nnz
* (interoperability with upper bound implementations)
*
* CONFIG_MACROS:
* if _OUT_IDXS == TRUE:
* return in *@outIdxs pointers to overallocd JAs nnz indexes
* *outIdxs[0] --> start of first row (its size in 0th of returned array)
* also is the malloc returned address for later free
* [NB: rows are not contiguos in the allocation]
* if _COL_PARTS == TRUE: return in *@rowColPartsLens
* a matrix @a->M * @gridCols of offsets
* for each column partition in each row
*/
idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
(
ROW_MMSYM_IMPL_MODE symbRowImplID, spmat* a, spmat* b
#if _OUT_IDXS == TRUE
,idx_t*** outIdxs
#endif
#if _COL_PARTS == TRUE
,ushort gridCols, idx_t** rowColPartsLens
#endif
);
//////Sp3MM - rowByrowByrow
#define SP3MM_UB_HEURISTIC 2
#if !defined COL_PARTS && defined OUT_IDXS
///MultiImplementations functions without COL_PARTS
//NB required OUT_IDXS for initial row-by-row step...
/*
* as earlier but meant for work with
* triple product as rob-by-row-by-row forwarding:
* @abRowJATmp is a tmp storage for first row-by-row sym product
* has to be big enough to store all the possible nonzero elements
* @nodes has to be big enough to store all possible nnz of ab and abc row
* (not only ab row as earlier version)
*
*/
idx_t CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F)
(
ROW_MMSYM_IMPL_MODE symbMMRowImplID, idx_t* aRowJA,idx_t aRowLen,
spmat* b,spmat* c, rbRoot* root,rbNode* nodes, idx_t* abRowJATmp
#if _OUT_IDXS == TRUE
#ifndef OUT_IDXS_RBTREE_NODES
,idx_t* outIdxs
#endif
#endif
);
/*
* triple product @a*@b*@c as rob-by-row-by-row forwarding:
* Returns: resulting matrix rows' sizes exactly in an array
* plus an extra element for the res.matrix's total size
* CONFIG_MACROS:
* if _OUT_IDXS == TRUE:
* return in *@outIdxs pointers to overallocd JAs nnz indexes
* *outIdxs[0] --> start of first row (its size in 0th of returned array)
* also is the malloc returned address for later free
* [NB: rows are not contiguos in the allocation]
*/
//TODO HEURISTICS IN _OUT_IDXS to avoid serializing after first sym.product
// see HEURISTICS_UB
idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F)
(
ROW_MMSYM_IMPL_MODE symbMMRowImplID, spmat* a, spmat* b, spmat* c
#if _OUT_IDXS == TRUE
,idx_t*** outIdxs
#endif
);
#endif
#undef OUT_IDXS
#undef _OUT_IDXS
#undef COL_PARTS
#undef _COL_PARTS

@ -0,0 +1,80 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SP3MM_CSR_OMP_SYMB_MULTI
#define SP3MM_CSR_OMP_SYMB_MULTI
/* Multi implementation of symbolic product of sparse matrixes, config macros
* OFF_F: C-Fortran spmat indexing
* OUT_IDXS: indexes output
* COL_PARTS: partitioning columns output...
*/
#define OUT_IDXS_ON OutIdxs_
#define COL_PARTS_ON ColParts_
#undef OUT_IDXS
#undef COL_PARTS
#define OFF_F 0
///generate basic versions
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
///generate outIdxs versions
#define OUT_IDXS OUT_IDXS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
#undef OUT_IDXS
///generate colParts versions
#define COL_PARTS COL_PARTS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
//generate outIdxs AND colParts ve sions
#define OUT_IDXS OUT_IDXS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
#undef OUT_IDXS
#undef COL_PARTS
#undef OFF_F
#define OFF_F 1
///generate basic versions
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
///generate outIdxs versions
#define OUT_IDXS OUT_IDXS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
#undef OUT_IDXS
///generate colParts versions
#define COL_PARTS COL_PARTS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
//generate outIdxs AND colParts ve sions
#define OUT_IDXS OUT_IDXS_ON
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
#undef OUT_IDXS
#undef COL_PARTS
#undef OFF_F
#endif

@ -0,0 +1,95 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OFF_F
/*#pragma message("generic implementation requires OFF_F defined")*/
#error generic implementation requires OFF_F defined
#endif
///SP3MM FUNCTIONS
/*
* triple matrix multiplication among @R * @AC * @P using gustavson parallel implementation
* implmented as a pair of subsequent spmm operations
* if @conf->spmm != NULL, it will be used as spmm function, otherwise euristics will be
* used to decide wich implementation to use
*/
SP3MM CAT(sp3mmRowByRowPair_,OFF_F);
/*
* row-by-row-by-row implementation: forwarding @R*@AC rth row to P for row-by-row
* accumulation in preallocated space, TODO exactly determined
* basic parallelization: 1thread per @R's rows that will also forward the result to P
*/
SP3MM CAT(sp3mmRowByRowMerged_,OFF_F);
///SUB FUNCTIONS
///SPMM FUNCTIONS
SPMM CAT(spmmSerial_,OFF_F); //mono thread version for debug oracle-less
/*
* sparse parallel implementation of @A * @B parallelizing Gustavson row-by-row
* formulation using an aux dense vector @_auxDense
* return resulting product matrix
*/
SPMM CAT(spmmRowByRow_,OFF_F);
/*
* sparse parallel implementation of @A * @B parallelizing Gustavson
* with partitioning of @A in @conf->gridRows blocks of rows
* return resulting product matrix
*/
SPMM CAT(spmmRowByRow1DBlocks_,OFF_F);
/*
* sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D
* with partitioning of
* @A into rows groups, uniform rows division
* @B into cols groups, uniform cols division, accessed by aux offsets
*/
SPMM CAT(spmmRowByRow2DBlocks_,OFF_F);
/*
* sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D
* with partitioning of
* @A into rows groups, uniform rows division
* @B into cols groups, uniform cols division, ALLOCATED as CSR submatrixes
*/
SPMM CAT(spmmRowByRow2DBlocksAllocated_,OFF_F);
///implementation wrappers as static array of function pointers
//sp3mm as pair of spmm
static SPMM_INTERF CAT(Spmm_UB_Funcs_,OFF_F)[] = {
& CAT(spmmRowByRow_,OFF_F),
& CAT(spmmRowByRow1DBlocks_,OFF_F),
& CAT(spmmRowByRow2DBlocks_,OFF_F),
& CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)
};
//sp3mm as direct product
static SP3MM_INTERF CAT(Sp3mm_UB_Funcs_,OFF_F)[] = {
& CAT(sp3mmRowByRowMerged_,OFF_F)
};

@ -0,0 +1,406 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
///scalar-vector multiply
//TODO che guadagno si ha ad utilizzare solo la versione generica delle successive 2 funzioni
/*
* Sparse vector part <->scalar multiplication in a dense output
* sparse vector part will hold nnz values in @vectVals (AS subvector)
* with corresponding indexes in @vectIdxs in range [0,@vectLen] (JA subvector)
* resulting vector accumulated in a dense array in @aux->v, along with nnzIdx
* both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen
*/
inline void CAT(scSparseVectMul_,OFF_F)
(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux){
for (ulong i=0,j; i<vectLen; i++){
j = vectIdxs[i]-OFF_F;
DEBUGCHECKS{
if (j>=aux->vLen){
fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen);
assert(j < aux->vLen);
}
}
aux->v[j] += vectVals[i] * scalar; //accumulate
//append new nonzero index to auxVNNZeroIdxs for quick sparsify
//if (!(aux->v[j])) aux->nnzIdx[ aux->nnzIdxMap.len++ ] = j; //TODO numerical zero may then cause readd
if(!spVect_idx_in(j,&aux->nnzIdxMap))
aux->nnzIdx[ aux->nnzIdxMap.len-1 ] = j;
}
}
/* Same as above scSparseVectMul_ , but consider an initial offset to remove from each idx
* Sparse vector part <->scalar multiplication in a dense output
* @vectVals: sparse vector part values ( from spmat.AS )
* with [at least] @vectLen corresponding
* target idxs in @vectIdxs (from spmat.JA ) starting from @startIdx
*
* Resulting vector accumulated in a dense array in @aux->v, along with nnzIdx
* all nnz values indexes will be shifted back of @startIdx in @aux
* both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen
*/
inline void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,
ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux){
for (ulong i=0,j; i<vectLen; i++){
j = vectIdxs[i]-OFF_F - startIdx;
DEBUGCHECKS{
if (j>=aux->vLen){
fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen);
assert(j < aux->vLen);
}
}
aux->v[j] += vectVals[i] * scalar; //accumulate
//append new nonzero index to auxVNNZeroIdxs for quick sparsify
//if (!(aux->v[j])) aux->nnzIdx[ aux->nnzIdxMap.len++ ] = j;
////TODO numerical zero may then cause readd
if(!spVect_idx_in(j,&aux->nnzIdxMap))
aux->nnzIdx[ aux->nnzIdxMap.len-1 ] = j;
}
}
///MultiImplementations functions
#ifndef OFF_F
#error generic implementations requires OFF_F defined
#endif
///TODO IMPLEMENT SCALAR<->ROW MUL AS GENERIC SPARSE VECTOR<->SCALAR MUL
//inline void scSparseRowMul(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){
inline void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){
ulong rowStartIdx = mat->IRP[trgtR]-OFF_F,rowLen;
#ifdef ROWLENS
rowLen = mat->RL[trgtR];
#else
rowLen = mat->IRP[trgtR+1] - mat->IRP[trgtR];
#endif
CAT(scSparseVectMul_,OFF_F)(scalar,mat->AS+rowStartIdx,mat->JA+rowStartIdx,rowLen,aux);
//TODO check impact of generic version use
}
///OUTPUT SIZE PREDICTION
/* O(A.NZ [+B.M] )
* return array of upper bounded row sizes of the product @A * @B
* also appended at the end for the cumulative total size of the matrix AB = A*B
*/
inline idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B){
AUDIT_INTERNAL_TIMES Start = omp_get_wtime();
idx_t* rowSizes = calloc((A->M+1), sizeof(*rowSizes));
if (!rowSizes){
ERRPRINT("spMMSizeUpperbound: rowSizes calloc errd\n");
return NULL;
}
idx_t fullMatBound = 0;
#pragma omp parallel for schedule(static) reduction(+:fullMatBound)
for (idx_t r=0; r<A->M; r++){
for (idx_t jj=A->IRP[r] - OFF_F,j,rlen; jj<A->IRP[r+1] - OFF_F; jj++){
j = A->JA[jj] - OFF_F;
#ifdef ROWLENS
rlen = B->RL[j];
#else
rlen = B->IRP[j+1] - B->IRP[j]; //OFF_F: delta is offset independent
#endif
rowSizes[r] += rlen;
fullMatBound += rlen;
//rowSizes[A->M] += rlen; //just below omp reduction sum
}
}
rowSizes[A->M] = fullMatBound;
AUDIT_INTERNAL_TIMES End= omp_get_wtime();
VERBOSE
printf("spMMSizeUpperbound:%lu\t%le s\n",rowSizes[A->M],End-Start);
return rowSizes;
}
/* O(A.NZ + B.NZ)
* return matrix @A.M x gridCols of upper bound
* for each of the @gridCols col partitions of the output matrix AB = @A * @B
* also appended at the end for the cumulative total size of the matrix AB
*/
inline idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)
(spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets){
AUDIT_INTERNAL_TIMES Start = omp_get_wtime();
idx_t* rowPartsSizes = calloc((A->M*gridCols +1), sizeof(*rowPartsSizes));
if (!rowPartsSizes){
ERRPRINT("spMMSizeUpperbound: rowPartsSizes calloc errd\n");
return NULL;
}
idx_t fullMatBound = 0;
#pragma omp parallel for schedule(static) reduction(+:fullMatBound)
for (idx_t r=0; r<A->M; r++){
//for each A.row -> sum B.colParts lens
for (idx_t jj=A->IRP[r]-OFF_F,j,rlen; jj<A->IRP[r+1]-OFF_F; jj++){
j = A->JA[jj] - OFF_F;
for (idx_t gc=0,bPartID=IDX2D(j,gc,gridCols); gc < gridCols; gc++,bPartID++){
rlen = bColPartOffsets[bPartID+1] - bColPartOffsets[bPartID];
rowPartsSizes[ IDX2D(r,gc,gridCols) ] += rlen;
fullMatBound += rlen;
}
}
}
rowPartsSizes[ A->M*gridCols ] = fullMatBound;
AUDIT_INTERNAL_TIMES End= omp_get_wtime();
VERBOSE
printf("spMMSizeUpperboundColParts_:%lu\t%le s\n",rowPartsSizes[A->M],End-Start);
return rowPartsSizes;
}
////Single implementations funcs
#ifndef SPMMUTILS_H_SINGLE_IMPLEMENTATION
#define SPMMUTILS_H_SINGLE_IMPLEMENTATION
#include "utils.h"
///Allocs - Free
//SpMM holder of accumulats
inline SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum){
SPMM_ACC* out = calloc(1,sizeof(*out));
if (!out){
ERRPRINT("initSpMMAcc: out calloc errd\n");
return NULL;
}
out->size = entriesNum;
if (!(out->JA = malloc(entriesNum * sizeof(*(out->JA))))){
ERRPRINT("initSpMMAcc: JA malloc errd\n");
goto _err;
}
if (!(out->AS = malloc(entriesNum * sizeof(*(out->AS))))){
ERRPRINT("initSpMMAcc: AS malloc errd\n");
goto _err;
}
if (!(out->accs = malloc(accumulatorsNum * sizeof(*(out->accs))))){
ERRPRINT("initSpMMAcc: accs malloc errd\n");
goto _err;
}
return out;
_err:
if (out->JA) free(out->JA);
if (out->AS) free(out->AS);
if (out->accs) free(out->accs);
if (out) free(out);
return NULL;
}
inline void freeSpMMAcc(SPMM_ACC* acc){
free(acc->JA);
free(acc->AS);
free(acc->accs);
free(acc);
}
///// dense acc sparsify functions
/*
* sparsify dense accumulated vector @accV (with shifted of @startColAcc)
* into sparse accumulator @accSparse that'll use space for nnz entries from @acc
*/
///DIRECT OUT MATRIX SPARSIFY
/*
* sparsify @accV directly inside row @row of matrix @m
* considering, if given not NULL, 2D partitioning with
* @gridCols cols groups and colGroups offsets per row matrix @colPartsOffsets
* :Returns inplace modify of @m
*/
static inline void sparsifyDirect(ACC_DENSE* accV,spmat* m,idx_t row){
idx_t nnz = accV->nnzIdxMap.len, sparsifyStart = m->IRP[row], sparsifyEnd = m->IRP[row+1];
sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write
DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz);
DEBUGCHECKS assert(nnz <= (sparsifyEnd - sparsifyStart));
for (idx_t i=0,j; i < nnz; i++){
j = accV->nnzIdx[i];
m -> JA[sparsifyStart + i] = j; //+ startColAcc;
m -> AS[sparsifyStart + i] = accV->v[j];
}
}
//TODO 2D partitioning - colParts version of above...best to use multi impl trick of symbStep
static inline void sparsifyDirectColParts(ACC_DENSE* accV,spmat* m,idx_t row,
ushort colGroupId,ushort gridCols, idx_t* colPartsOffsets,idx_t startCol){
idx_t nnz = accV->nnzIdxMap.len;
ushort partID = IDX2D(row,colGroupId,gridCols);
idx_t sparsifyStart = colPartsOffsets[partID], sparsifyEnd = colPartsOffsets[partID+1];
sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write
DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz);
DEBUGCHECKS assert( nnz == sparsifyEnd - sparsifyStart );
sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write
for (idx_t i=0,j; i < nnz; i++){
j = accV->nnzIdx[i];
m -> JA[sparsifyStart + i] = j + startCol;
m -> AS[sparsifyStart + i] = accV->v[j];
}
}
///UB SPACE SPARSIFY
//internal sparsivy dense acc inside (prepared) sparse acc struct
static inline void _sparsifyUB(ACC_DENSE* accV,SPACC* accSparse,idx_t startColAcc){
idx_t nnz = accV->nnzIdxMap.len;
sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write
DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz);
for (idx_t i=0,j; i < nnz; i++){
j = accV->nnzIdx[i];
accSparse -> JA[i] = j + startColAcc;
accSparse -> AS[i] = accV->v[j];
}
accSparse -> len = nnz;
}
//row[Part] sparsified in a thread safe (exactly long) reserved area using atomics
static inline void sparsifyUBNoPartsBounds
(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc){
//sort nnz indexes of dense accumulator
idx_t nnz = accV->nnzIdxMap.len;
idx_t sparsifyStartV; //start index(inside @accSparse) of @accV to sparsify
//sparsifyStartV = __atomic_fetch_add(&(acc->lastAssigned),nnz,__ATOMIC_ACQ_REL);
#pragma omp atomic capture
{ //fetch and add like ....
sparsifyStartV = acc->lastAssigned;
acc->lastAssigned += nnz;
}
DEBUGCHECKS{
if (acc->lastAssigned >= acc->size){
ERRPRINT("OMP ATOMIC OR SG ERRD IN SPACE ASSIGNMENTS...\n");
assert(acc->lastAssigned < acc->size);
}
}
//
accSparse -> AS = acc->AS + sparsifyStartV;
accSparse -> JA = acc->JA + sparsifyStartV;
_sparsifyUB(accV,accSparse,startColAcc);
}
////output-gather functons
/*
* merge @conf->gridCols*@mat->M sparse rows partitions into @mat
* EXPECTED rowsParts @rowsParts to be sorted in accord to the
* 2D rowMajor computing grid given in @conf
* allocd arrays to hold non zero values and indexes into @mat
*/
inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,
CONFIG* conf){
ulong nzNum=0,j,rLen,idx,partsNum = mat->M * conf->gridCols;
//TODO PARALLEL MERGE - SAVE STARTING offset OF EACH PARTITION IN THE OUT MATRIX
ulong* rowsPartsOffsets=alloca(partsNum*sizeof(*rowsPartsOffsets));
///count nnz entries and alloc arrays for them
for (ulong r=0; r<mat->M; r++){
//for each partition ->get len -> outMat.IRP and aux offsets
for (j=0,rLen=0; j<conf->gridCols; j++){
idx = IDX2D(r,j,conf->gridCols);
rowsPartsOffsets[idx]=nzNum+rLen;//part start=prev accumulated end
rLen += rowsParts[idx].len;
}
nzNum += rLen;
mat->IRP[r+1] = nzNum;
#ifdef ROWLENS
mat->RL[r] = rLen;
#endif
}
mat->NZ = nzNum;
if (!(mat->AS = malloc(nzNum * sizeof(*(mat->AS))))){
ERRPRINT("merged sparse matrix AS alloc errd\n");
return EXIT_FAILURE;
}
if (!(mat->JA = malloc(nzNum * sizeof(*(mat->JA))))){
ERRPRINT("merged sparse matrix JA alloc errd\n");
return EXIT_FAILURE;
}
///popolate with rows nnz values and indexes
ulong pLen; //omp for aux vars
#pragma omp parallel for schedule(static) private(pLen)
for (ulong i=0; i<partsNum; i++){
pLen = rowsParts[i].len;
memcpy(mat->AS + rowsPartsOffsets[i],rowsParts[i].AS,pLen*sizeof(*(mat->AS)));
memcpy(mat->JA + rowsPartsOffsets[i],rowsParts[i].JA,pLen*sizeof(*(mat->JA)));
}
CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually
for (ulong i=0,w=0; i<mat->M; i++){
if (mat->IRP[i] != w)
{ERRPRINT("MERGE ROW ERR IRP\n");return -1;}
for (j=0; j<conf->gridCols; j++){
SPACC r = rowsParts[IDX2D(i,j,conf->gridCols)];
for (ulong jj=0; jj<r.len; jj++,w++){
if (mat->AS[w]!= r.AS[jj]){
ERRPRINT("MERGE ROW ERR AS\n"); return -1;}
if (mat->JA[w]!= r.JA[jj]){
ERRPRINT("MERGE ROW ERR JA\n"); return -1;}
}
}
}
}
return EXIT_SUCCESS;
}
/*
* merge @mat->M sparse rows @rows in sparse matrix @mat
* EXPECTED @rows to be sorted in accord to trgt matrix row index @r
* allocd arrays to hold non zero values and indexes into @mat
*/
inline int mergeRows(SPACC* rows,spmat* mat){
ulong nzNum=0;
//count nnz entries and alloc arrays for them
for (ulong r=0; r<mat->M; ++r){
nzNum += rows[r].len;
mat->IRP[r+1] = nzNum;
#ifdef ROWLENS
mat->RL[r] = rows[r].len
#endif
}
mat->NZ = nzNum;
if (!(mat->AS = malloc(nzNum * sizeof(*(mat->AS))))){
ERRPRINT("merged sparse matrix AS alloc errd\n");
return EXIT_FAILURE;
}
if (!(mat->JA = malloc(nzNum * sizeof(*(mat->JA))))){
ERRPRINT("merged sparse matrix JA alloc errd\n");
return EXIT_FAILURE;
}
///POPOLATE WITH ROWS NNZ VALUES AND INDEXES
//TODO PARALLEL COPY
#pragma omp parallel for schedule(static)
for (ulong r=0; r<mat->M; r++){
memcpy(mat->AS+mat->IRP[r], rows[r].AS, rows[r].len*sizeof(*(mat->AS)));
memcpy(mat->JA+mat->IRP[r], rows[r].JA, rows[r].len*sizeof(*(mat->JA)));
}
CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually
for (ulong r=0,i=0; r<mat->M; r++){
if (i != mat->IRP[r])
{ERRPRINT("MERGE ROW ERR IRP\n");return -1;}
for (ulong j=0; j<rows[r].len; j++,i++){
if (mat->AS[i]!= rows[r].AS[j]){
ERRPRINT("MERGE ROW ERR AS\n"); return -1;}
if (mat->JA[i] != rows[r].JA[j]){
ERRPRINT("MERGE ROW ERR JA\n"); return -1;}
}
}
}
return EXIT_SUCCESS;
}
#endif //SPMMUTILS_H_SINGLE_IMPLEMENTATION

@ -0,0 +1,56 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SPMMUTILSMULTI_H
#define SPMMUTILSMULTI_H
extern char TRGT_IMPL_START_IDX; //multi implementation switch
#ifdef OFF_F //save "includer" OFF_F value before overwriting it
#pragma push_macro("OFF_F")
#define _OFF_F_OLD
#undef OFF_F
#endif
#define OFF_F 0
#include "SpMMUtilsGeneric.h"
#undef OFF_F
#define OFF_F 1
#include "SpMMUtilsGeneric.h"
#undef OFF_F
#ifdef _OFF_F_OLD
#pragma pop_macro("OFF_F")
#undef _OFF_F_OLD
#endif
#endif

@ -0,0 +1,155 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CONFIG_H
#define CONFIG_H
//CONFIG STRUCT DEPENDECIES
//switch among diferent symb row -> rows implementation var
typedef enum{
RBTREE,
IDXMAP
} ROW_MMSYM_IMPL_MODE;
//
typedef struct{
ushort gridRows;
ushort gridCols;
//TODO FULL CONFIG DOCCED HERE
ROW_MMSYM_IMPL_MODE symbMMRowImplID; //how to compute symb mul (if required)
uint threadNum; //thread num to use in an OMP parallel region ...
void* chunkDistrbFunc; //CHUNKS_DISTR_INTERF func pntr
} CONFIG;
///Smart controls
typedef size_t idx_t; //spmat indexes
typedef unsigned __int128 uint128;
#include "macros.h"
#ifndef SPARSIFY_PRE_PARTITIONING
#define SPARSIFY_PRE_PARTITIONING TRUE //u.b. implementation will sparsify dense acc in a pre splitted mem area
#endif
///AUDIT&CHECKS
//debug checks and tmp stuff
#ifndef DEBUG
#define DEBUG if( TRUE )
#endif
//long prints
#ifndef DEBUGPRINT
#define DEBUGPRINT if( FALSE )
#endif
//heavy impact debug checks
#ifndef DEBUGCHECKS
#define DEBUGCHECKS if( FALSE )
#endif
//extra print in the normal output
#ifndef AUDIT_INTERNAL_TIMES
#define AUDIT_INTERNAL_TIMES if( TRUE )
#endif
#ifndef VERBOSE
#define VERBOSE if( FALSE )
#endif
//extra checks over the imput and correct partials
#ifndef CONSISTENCY_CHECKS
#define CONSISTENCY_CHECKS if( TRUE )
#endif
#ifndef SPVECT_IDX_BITWISE //SPVECT_IDX_DENSE_ACC.nnzIdxsFlags will be an array of bitflags
#define SPVECT_IDX_BITWISE TRUE
#endif
#if SPVECT_IDX_BITWISE == TRUE
#ifndef LIMB_T
#define LIMB_T uint128
#endif
typedef LIMB_T limb_t;
typedef limb_t* nnz_idxs_flags_t;
#define LIMB_SIZE_BIT ( sizeof(limb_t) * 8 )
#else //nnz idxs ar flags in a byte arry
typedef uchar* nnz_idxs_flags_t;
#define LIMB_SIZE_BIT ( sizeof(uchar) * 8 )
#endif
///AUDIT extra configuration
//#define ROWLENS
#ifdef ROWLENS
/*#pragma message("ROW_LENS ARRAY ENABLED")*/
#endif
/*
* idxsMapAcc based symb row*rows, outIdxs and colParts carried with an aux rbtree
* otherwise carried throught an append array
* (hyp same computational cost... n + nlog n)
*/
#ifndef IDX_RMUL_SYMB_RBTREE
#define IDX_RMUL_SYMB_RBTREE FALSE
#endif
#ifndef RB_CACHED_INSERT
#define RB_CACHED_INSERT TRUE //use cached insert
#endif
//#define USE_RB_ROOT_CACHE_LMOST //use leftmost leaf cached in rbtree in sym mul
///CONSTS
#define ELL_MAX_ENTRIES ( 6l << 27 ) //2*6GB of ell (padded) entries maxSupport in a matrix
#define LIMIT_ELL_SIZE //enable above threshold
#define ELL_AS_FILLER (0 ) //handled with calloc
//TODO NOW FILLED WITH LAST NNPADDED COL #define ELL_JA_FILLER (-1)
//#define DOUBLE_VECT_DIFF_EARLY_EXIT 1
//#define RNDVECTMIN 222222
#define VECTOR_STEP_REALLOC 25
#define VECTOR_READ_BLOCK 50 //file (raw) vector read block
#define RNDVECTORSIZE 100000
#define RNDVECTORDUMP TMPDIR "rndVectorDump"
#define RNDVECTORDUMPRAW TMPDIR "rndVectorDumpRaw"
#define OUTVECTORDUMP TMPDIR "outVectorDump"
#define OUTVECTORDUMPRAW TMPDIR "outVectorDumpRaw"
//#define FLOAT_DIFF_ABS
#ifndef AVG_TIMES_ITERATION
#define AVG_TIMES_ITERATION 5
#endif
//ompChunksDivide.h -> chunksFairFolded()
#ifndef FAIR_CHUNKS_FOLDING
#define FAIR_CHUNKS_FOLDING 4
#endif
//SPMV specific
//rows partitions for dotProduct SIMD reduction enable
#ifndef SIMD_ROWS_REDUCTION
#define SIMD_ROWS_REDUCTION TRUE
#endif
/*#if SIMD_ROWS_REDUCTION == TRUE
#pragma message("SIMD_ROWS_REDUCTION enabled")
//TODO SOME TRICK TO HAVE 1! PRINT
#endif*/
extern double Start,End,Elapsed,ElapsedInternal;
#define DOUBLE_DIFF_THREASH 7e-4
#define DRNG_DEVFILE "/dev/urandom"
#define MAXRND 3e-5
#ifndef TMPDIR
#define TMPDIR "/tmp/"
#endif
#define TMP_EXTRACTED_MARTIX TMPDIR "extractedMatrix"
#endif //CONFIG_H

@ -0,0 +1,49 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
//inline export - single implmentation functions
void cleanRbNodes(rbRoot* root,rbNode* nodes,idx_t nodesNum);
int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key);
//void C_FortranShiftIdxs(spmat* outMat);
//void Fortran_C_ShiftIdxs(spmat* m);
ACC_DENSE* _initAccVectors(ulong num,ulong size);
ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size); //TODO PERF WITH NEXT
SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum);
idx_t reductionMaxSeq(idx_t* arr,idx_t arrLen);
int _allocAccDense(ACC_DENSE* v,ulong size);
int mergeRows(SPACC* rows,spmat* mat);
int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,CONFIG* conf);
void _freeAccsDenseChecks(ACC_DENSE* vectors,ulong num);
void _resetAccVect(ACC_DENSE* acc);
void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc);
void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen);
void freeAccsDense(ACC_DENSE* vectors,ulong num);
void freeSpMMAcc(SPMM_ACC* acc);
void sparsifyDenseVect(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc);

@ -0,0 +1,43 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "macros.h"
#ifndef OFF_F
#error generic implementation requires OFF_F defined
#endif
////inline exports
//multi implmentation functions
void CAT(scSparseVectMul_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux);
void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux);
void CAT(_scRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux);
void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux);
idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B);
idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)(spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets);

@ -0,0 +1,335 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
Red Black Trees
(C) 1999 Andrea Arcangeli <andrea@suse.de>
Userspace GNUC porting,del augmented deps and minimalizing for few OPs only: Andrea Di Iorio
linux/include/linux/rbtree.h
To use rbtrees you'll have to implement your own insert and search cores.
This will avoid us to use callbacks and to drop drammatically performances.
I know it's not the cleaner way, but in C (not in C++) to get
performances and genericity...
See Documentation/core-api/rbtree.rst for documentation and samples.
*/
/*
* RedBlackTree_linux_userspace
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* https://bitbucket.org/andysnake96/redblacktree_linux_userspace
*/
#ifndef _LINUX_RBTREE_H
#define _LINUX_RBTREE_H
//#include <linux/kernel.h> //TODO LESS_DEPENDENCIES
//#include <linux/stddef.h> //TODO LESS_DEPENDENCIES
//#include <linux/rcupdate.h> //TODO LESS_DEPENDENCIES
#include <stdlib.h>
#include <stdio.h>
#include <macros.h> ///my usefull macros :)
#include <macrosLinuxMock.h> ///my usefull macros :)
struct rb_node {
unsigned long __rb_parent_color;
struct rb_node *rb_right;
struct rb_node *rb_left;
} __attribute__((aligned(sizeof(long))));
/* The alignment might seem pointless, but allegedly CRIS needs it */
struct rb_root {
struct rb_node *rb_node;
};
/////MINIMALIZE fulfilling rbtree_augmented deps TODO
///rb_node colors labels
#define RB_RED 0
#define RB_BLACK 1
#define __rb_parent(pc) ((struct rb_node *)(pc & ~3))
#define __rb_color(pc) ((pc) & 1)
#define __rb_is_black(pc) __rb_color(pc)
#define __rb_is_red(pc) (!__rb_color(pc))
#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
static inline void rb_set_parent_color(struct rb_node *rb,
struct rb_node *p, int color)
{
rb->__rb_parent_color = (unsigned long)p | color;
}
///rb_set_parent //TODO GENERIC ADDONLY
static inline void
__rb_change_child(struct rb_node *old, struct rb_node *new,
struct rb_node *parent, struct rb_root *root)
{
if (parent) {
if (parent->rb_left == old)
WRITE_ONCE(parent->rb_left, new);
else
WRITE_ONCE(parent->rb_right, new);
} else
WRITE_ONCE(root->rb_node, new);
}
/////////////////////////////
#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
#define RB_ROOT (struct rb_root) { NULL, }
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL)
/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
#define RB_EMPTY_NODE(node) \
((node)->__rb_parent_color == (unsigned long)(node))
#define RB_CLEAR_NODE(node) \
((node)->__rb_parent_color = (unsigned long)(node))
extern void rb_insert_color(struct rb_node *, struct rb_root *);
extern void rb_erase(struct rb_node *, struct rb_root *);
/* Find logical next and previous nodes in a tree */
extern struct rb_node *rb_next(const struct rb_node *);
extern struct rb_node *rb_prev(const struct rb_node *);
extern struct rb_node *rb_first(const struct rb_root *);
extern struct rb_node *rb_last(const struct rb_root *);
/* Postorder iteration - always visit the parent after its children */
extern struct rb_node *rb_first_postorder(const struct rb_root *);
extern struct rb_node *rb_next_postorder(const struct rb_node *);
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
struct rb_node **rb_link)
{
node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
*rb_link = node;
}
/*static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
struct rb_node **rb_link)
{
node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
rcu_assign_pointer(*rb_link, node);
}**/ //TODO LESS_DEPENDENCIES
#define rb_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
____ptr ? rb_entry(____ptr, type, member) : NULL; \
})
/**
* rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
* given type allowing the backing memory of @pos to be invalidated
*
* @pos: the 'type *' to use as a loop cursor.
* @n: another 'type *' to use as temporary storage
* @root: 'rb_root *' of the rbtree.
* @field: the name of the rb_node field within 'type'.
*
* rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
* list_for_each_entry_safe() and allows the iteration to continue independent
* of changes to @pos by the body of the loop.
*
* Note, however, that it cannot handle other modifications that re-order the
* rbtree it is iterating over. This includes calling rb_erase() on @pos, as
* rb_erase() may rebalance the tree, causing us to miss some nodes.
*/
#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
typeof(*pos), field); 1; }); \
pos = n)
/*
* Leftmost-cached rbtrees.
*
* We do not cache the rightmost node based on footprint
* size vs number of potential users that could benefit
* from O(1) rb_last(). Just not worth it, users that want
* this feature can always implement the logic explicitly.
* Furthermore, users that want to cache both pointers may
* find it a bit asymmetric, but that's ok.
*/
struct rb_root_cached {
struct rb_root rb_root;
struct rb_node *rb_leftmost;
};
#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
/* Same as rb_first(), but O(1) */
#define rb_first_cached(root) (root)->rb_leftmost
static inline void rb_insert_color_cached(struct rb_node *node,
struct rb_root_cached *root,
bool leftmost)
{
if (leftmost)
root->rb_leftmost = node;
rb_insert_color(node, &root->rb_root);
}
static inline void rb_erase_cached(struct rb_node *node,
struct rb_root_cached *root)
{
if (root->rb_leftmost == node)
root->rb_leftmost = rb_next(node);
rb_erase(node, &root->rb_root);
}
static inline void rb_replace_node_cached(struct rb_node *victim,
struct rb_node *new,
struct rb_root_cached *root)
{
if (root->rb_leftmost == victim)
root->rb_leftmost = new;
rb_replace_node(victim, new, &root->rb_root);
}
///////////////////////////////////////////////////////////////////////////////
/***END OF ORIGINAL LINUX KERNEL HEADER MINIMALIZED
***FEW AUX FUNCS TO SUPPORT SpMM-Symbolic
***/
#include <string.h>
#include "config.h"
typedef struct{
idx_t key;
struct rb_node rb;
/* following fields used for testing augmented rbtree functionality
u32 val;
u32 augmented; ///only for AUGMENTED_TEST
*/
} rbNode;
//static struct rb_root_cached root = RB_ROOT_CACHED;
typedef struct rb_root_cached rbRoot;
/*
* return 1 if @node with the given key @key
* has been inserted in rbtree rooted at @root; 0 otherwise
*/
static inline int rbInsertStdNewKey(rbRoot *root,rbNode *node, idx_t key)
{
struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
idx_t parentK;
while (*new) {
parent = *new;
parentK = rb_entry(parent, rbNode, rb)->key;
if (key < parentK) new = &parent->rb_left;
else if (key > parentK) new = &parent->rb_right;
else return 0; //already in
DEBUGCHECKS assert( *new != parent );
}
//insert the node in the correct position, assigning the key
/*DEBUGCHECKS{ //check for double insertion
rbNode testNode;
memset(&testNode,0,sizeof(testNode));
assert( !memcmp(node,&testNode,sizeof(*node)) );
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n))
assert( n != *new );
}*/
node->key = key;
rb_link_node(&node->rb, parent, new);
rb_insert_color(&node->rb, &root->rb_root);
return 1;
}
static inline int rbInsertCachedNewKey(rbRoot *root,rbNode *node, idx_t key)
{
struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
idx_t parentK;
bool leftmost = true;
while (*new) {
parent = *new;
parentK = rb_entry(parent, rbNode, rb)->key;
if (key < parentK) new = &parent->rb_left;
else if (key > parentK) {
new = &parent->rb_right;
leftmost = false;
}
else return 0;
}
//insert the node in the correct position, assigning the key
/*DEBUGCHECKS{ //check for double insertion
rbNode testNode;
memset(&testNode,0,sizeof(testNode));
assert( !memcmp(node,&testNode,sizeof(*node)) );
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n))
assert( n != *new );
}*/
node->key = key;
rb_link_node(&node->rb, parent, new);
rb_insert_color_cached(&node->rb, root, leftmost);
return 1;
}
static inline int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key){
#if RB_CACHED_INSERT == TRUE
return rbInsertCachedNewKey(root,node,key);
#else
return rbInsertStdNewKey(root,node,key);
#endif
}
#define rbNodeOrderedVisit(n,root) \
for (n = rb_first(&root->rb_root); n; n = rb_next(n));
inline void cleanRbNodes(rbRoot* root,rbNode* nodes,idx_t nodesNum){
memset(nodes,0,nodesNum * sizeof(*nodes));
memset(root,0,sizeof(*root));
}
#endif /* _LINUX_RBTREE_H */

@ -0,0 +1,118 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MACROS
#define MACROS
#include <stdio.h>
#include <stdlib.h>
///aux macro-functions
#define ABS(a) ((a) > 0 ? (a) : -(a))
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define AVG(a,b) ((a)/2 + (b)/2 + ((a)%2+(b)%2)/2)
#define SWAP(a,b) (a)=(a)^(b);(b)=(b)^(a);(a)=(a)^(b)
#define IN_RANGE(i,start,end) ( (start) <= (i) && (i) <= (end) )
//ceil(x/y) with integers
#define INT_DIV_CEIL(x,y) ( ( (x) - 1) / (y) + 1 )
//2D ROW MAJOR indexing wrap compute
#define IDX2D(i,j,nCols) ((j) + (i)*(nCols))
///distribuite reminder @rem in group givin an extra +1 to the first @rem
#define UNIF_REMINDER_DISTRI(i,div,rem) \
( (div) + ( (i) < (rem) ? 1 : 0 ) )
#define UNIF_REMINDER_DISTRI_STARTIDX(i,div,rem) \
( (i) * (div) + MIN( (i),(rem) ) )
#define UNIF_REMINDER_DISTRI_ENDIDX(i,div,rem) \
( (i+1) * (div) + MIN( (i),(rem) ) )
//shorter name alias
#define unifRemShare(i,div,rem) UNIF_REMINDER_DISTRI(i,div,rem)
#define unifRemShareStart(i,div,rem) UNIF_REMINDER_DISTRI_STARTIDX(i,div,rem)
#define unifRemShareEnd(i,div,rem) UNIF_REMINDER_DISTRI_ENDIDX( (i) , (div) , (rem) )
#define unifRemShareBlock(i,div,rem) unifRemShareStart(i,div,rem), unifRemShare(i,div,rem)
#define STATIC_ARR_ELEMENTS_N(arr) (sizeof( (arr) ) / (sizeof(*(arr))))
////STRING UTILS
#define _STR(s) #s
#define STR(s) _STR(s)
///CONCATENATE
//Concatenate preprocessor tokens A and B WITHOUT expanding macro definitions
#define _CAT(a,b) a ## b
#define _CAT3(a,b,c) a ## b ## c
#define _CAT4(a,b,c,d) a ## b ## c ## d
//Concatenate preprocessor tokens A and B EXPANDING macro definitions
#define CAT(a,b) _CAT(a,b)
#define CAT3(a,b,c) _CAT3(a,b,c)
#define CAT4(a,b,c,d) _CAT4(a,b,c,d)
#define _UNDEF _
////PRINTS
#define CHIGHLIGHT "\33[1m\33[92m"
#define CCC CHIGHLIGHT
#define CHIGHLIGHTERR "\33[31m\33[1m\33[44m"
#define CCCERR CHIGHLIGHTERR
#define CEND "\33[0m"
#define hprintsf(str,...) printf( CHIGHLIGHT str CEND,__VA_ARGS__ )
#define hprintf(str) printf( CHIGHLIGHT str CEND)
#define ERRPRINTS(str,...) fprintf( stderr, CHIGHLIGHTERR str CEND,__VA_ARGS__ )
#define ERRPRINT(str) fprintf( stderr, CHIGHLIGHTERR str CEND )
#include <assert.h>
///aux types
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;
typedef char bool;
#define TRUE (!0)
#define FALSE 0
#define true TRUE
#define false FALSE
#define T TRUE
#define F FALSE
//smart decimal type custom precision def build macro _DECIMAL_TRGT_PREC
#ifndef _DECIMAL_TRGT_PREC
//dflt floating point precision & formatting chars
#define _DECIMAL_TRGT_PREC double
#define _DECIMAL_TRGT_PREC_PR "%lf"
#else
//TODO SELECT WITH RESPECT TO THE EXPORTED TARGET DECIMAL TYPE
#define _DECIMAL_TRGT_PREC_PR "%f"
#endif
typedef _DECIMAL_TRGT_PREC decimal;
///EXTRA INCLUDE --- cuda
///assertionn are disabled at compile time by defining the NDEBUG preprocessor macro before including assert.h s
//#ifdef ASSERT #include <assert.h> #endif
#endif //MACROS

@ -0,0 +1,140 @@
/*
* RedBlackTree_linux_userspace
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MACROSLINUXMOCK
#define MACROSLINUXMOCK
////////////////////////// LINUX KERNEL Userspaceing RBTree//////////////////////////////
typedef unsigned int u32;
typedef unsigned int cycles_t;
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
/**
* container_of - cast a member of a structure out to the containing structure
* @ptr: the pointer to the member.
* @type: the type of the container struct this is embedded in.
* @member: the name of the member within the struct.
* TODO LESS_DEPENDENCIES
removed:
BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
!__same_type(*(ptr), void), \
"pointer type mismatch in container_of()"); \
*/
#define container_of(ptr, type, member) ({ \
void *__mptr = (void *)(ptr); \
((type *)(__mptr - offsetof(type, member))); })
#define NOOP(x) x
//#define NOOP do {} while(0)
#undef unlikely
#define unlikely(x) NOOP(x)
/*
* Yes, this permits 64-bit accesses on 32-bit architectures. These will
* actually be atomic in some cases (namely Armv7 + LPAE), but for others we
* rely on the access being split into 2x32-bit accesses for a 32-bit quantity
* (e.g. a virtual address) and a strong prevailing wind.
*/
#define compiletime_assert_rwonce_type(t) \
compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
"Unsupported access size for {READ,WRITE}_ONCE().")
///TODO OVERWRITTEN FOR PORTING
#undef compiletime_assert_rwonce_type
#define compiletime_assert_rwonce_type(t) NOOP(t)
/*
* Use __READ_ONCE() instead of READ_ONCE() if you do not require any
* atomicity. Note that this may result in tears!
*/
#ifndef __READ_ONCE
#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x))
#endif
#define READ_ONCE(x) \
({ \
compiletime_assert_rwonce_type(x); \
__READ_ONCE(x); \
})
#define __WRITE_ONCE(x, val) \
do { \
*(volatile typeof(x) *)&(x) = (val); \
} while (0)
/*//TODO ORIGNAL VERSION
#define WRITE_ONCE(x, val) \
do { \
compiletime_assert_rwonce_type(x); \
__WRITE_ONCE(x, val); \
} while (0)
*/
#define WRITE_ONCE(x,val) x = val
/*
* Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a
* word from memory atomically but without telling KASAN/KCSAN. This is
* usually used by unwinding code when walking the stack of a running process.
*/
#define READ_ONCE_NOCHECK(x) \
({ \
compiletime_assert(sizeof(x) == sizeof(unsigned long), \
"Unsupported access size for READ_ONCE_NOCHECK()."); \
(typeof(x))__read_once_word_nocheck(&(x)); \
})
#include <x86intrin.h> //rdtsc
static inline cycles_t get_cycles(void)
{
/**#ifndef CONFIG_X86_TSC
if (!boot_cpu_has(X86_FEATURE_TSC))
return 0;
#endif */ ///TODO LESS_DEPENDENCIES
return __rdtsc();
}
/**#define WARN_ON_ONCE(condition) ({ \
static bool __section(".data.once") __warned; \
int __ret_warn_once = !!(condition); \
\
if (unlikely(__ret_warn_once && !__warned)) { \
__warned = true; \
WARN_ON(1); \
} \
unlikely(__ret_warn_once); \
}) */ //TODO LESS_DEPENDENCIES
#include <assert.h>
#define WARN_ON_ONCE(condition) assert( !(condition) )
#define div_u64(a,b) ( a / b )
#define kfree(x) free(x)
#endif //MACROSLINUXMOCK

@ -0,0 +1,133 @@
/*
* Matrix Market I/O library for ANSI C
*
* See http://math.nist.gov/MatrixMarket for details.
*
*
*/
#ifndef MM_IO_H
#define MM_IO_H
#define MM_MAX_LINE_LENGTH 1025
#define MatrixMarketBanner "%%MatrixMarket"
#define MM_MAX_TOKEN_LENGTH 64
typedef char MM_typecode[4];
char *mm_typecode_to_str(MM_typecode matcode);
int mm_read_banner(FILE *f, MM_typecode *matcode);
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);
int mm_read_mtx_array_size(FILE *f, int *M, int *N);
int mm_write_banner(FILE *f, MM_typecode matcode);
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz);
int mm_write_mtx_array_size(FILE *f, int M, int N);
/********************* MM_typecode query fucntions ***************************/
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
#define mm_is_dense(typecode) ((typecode)[1]=='A')
#define mm_is_array(typecode) ((typecode)[1]=='A')
#define mm_is_complex(typecode) ((typecode)[2]=='C')
#define mm_is_real(typecode) ((typecode)[2]=='R')
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
#define mm_is_integer(typecode) ((typecode)[2]=='I')
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
#define mm_is_general(typecode) ((typecode)[3]=='G')
#define mm_is_skew(typecode) ((typecode)[3]=='K')
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
int mm_is_valid(MM_typecode matcode); /* too complex for a macro */
/********************* MM_typecode modify fucntions ***************************/
#define mm_set_matrix(typecode) ((*typecode)[0]='M')
#define mm_set_coordinate(typecode) ((*typecode)[1]='C')
#define mm_set_array(typecode) ((*typecode)[1]='A')
#define mm_set_dense(typecode) mm_set_array(typecode)
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
#define mm_set_complex(typecode)((*typecode)[2]='C')
#define mm_set_real(typecode) ((*typecode)[2]='R')
#define mm_set_pattern(typecode)((*typecode)[2]='P')
#define mm_set_integer(typecode)((*typecode)[2]='I')
#define mm_set_symmetric(typecode)((*typecode)[3]='S')
#define mm_set_general(typecode)((*typecode)[3]='G')
#define mm_set_skew(typecode) ((*typecode)[3]='K')
#define mm_set_hermitian(typecode)((*typecode)[3]='H')
#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
(*typecode)[2]=' ',(*typecode)[3]='G')
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
/********************* Matrix Market error codes ***************************/
#define MM_COULD_NOT_READ_FILE 11
#define MM_PREMATURE_EOF 12
#define MM_NOT_MTX 13
#define MM_NO_HEADER 14
#define MM_UNSUPPORTED_TYPE 15
#define MM_LINE_TOO_LONG 16
#define MM_COULD_NOT_WRITE_FILE 17
/******************** Matrix Market internal definitions ********************
MM_matrix_typecode: 4-character sequence
ojbect sparse/ data storage
dense type scheme
string position: [0] [1] [2] [3]
Matrix typecode: M(atrix) C(oord) R(eal) G(eneral)
A(array) C(omplex) H(ermitian)
P(attern) S(ymmetric)
I(nteger) K(kew)
***********************************************************************/
#define MM_MTX_STR "matrix"
#define MM_ARRAY_STR "array"
#define MM_DENSE_STR "array"
#define MM_COORDINATE_STR "coordinate"
#define MM_SPARSE_STR "coordinate"
#define MM_COMPLEX_STR "complex"
#define MM_REAL_STR "real"
#define MM_INT_STR "integer"
#define MM_GENERAL_STR "general"
#define MM_SYMM_STR "symmetric"
#define MM_HERM_STR "hermitian"
#define MM_SKEW_STR "skew-symmetric"
#define MM_PATTERN_STR "pattern"
/* high level routines */
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode);
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode);
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
MM_typecode matcode);
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
double **val_, int **I_, int **J_);
#endif

@ -0,0 +1,105 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OMPCHUNKSDIVIDE
#define OMPCHUNKSDIVIDE
/*
* chunks distribution for runtime schedule setted to dynamic, TODO guided
* generic interface with elements, srcMatrix, configuration
* TODO the srcMatrix is leaved for advanced chunk distribution...
* configuration is expected to have a valid number of threadNum setted
*/
#include "config.h"
//distribution of @rows|blocks of @matrix, exploiting @config
typedef void (CHUNKS_DISTR ) (ulong,spmat*,CONFIG*);
typedef void (*CHUNKS_DISTR_INTERF ) (ulong,spmat*,CONFIG*);
//NOOP chunks division for manual configuration via export OMP_SCHEDULE
inline void chunksNOOP(ulong r,spmat* mat,CONFIG* cfg){ return; }
#include "ompGetICV.h"
//fair division of @r elements from matrix @mat with threads num in @cfg
inline void chunksFair(ulong r,spmat* mat,CONFIG* cfg){
assert(cfg->threadNum > 0); //configured target thread num
omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic;
omp_get_schedule(&kind,&chunk_size);
k = kind;
#if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0
monotonic = omp_sched_monotonic & kind;
if(monotonic) k = kind-omp_sched_monotonic;
#endif
(void) monotonic; //else no unused warning
switch(k){
case omp_sched_static :
DEBUG printf("static it's already fair\n");
return;
case omp_sched_dynamic:
chunk_size_new = MAX(1,r/cfg->threadNum);
break;
//case omp_sched_guided :
//case omp_sched_auto :
//default:
}
if(chunk_size == chunk_size_new) return;
omp_set_schedule(kind,chunk_size_new);
VERBOSE printf("chunksFair:\tchunk adapted to %d\n",chunk_size_new);
DEBUG ompGetRuntimeSchedule(NULL);
}
//fair division of @r elements from matrix @mat of threads in @cfg
//subdividing the fair share with a factor of @FAIR_CHUNKS_FOLDING
inline void chunksFairFolded(ulong r,spmat* mat,CONFIG* cfg){
assert(cfg->threadNum > 0); //configured target thread num
omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic;
omp_get_schedule(&kind,&chunk_size);
k = kind;
#if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0
monotonic = omp_sched_monotonic & kind;
if(monotonic) k = kind-omp_sched_monotonic;
#endif
(void) monotonic; //else no unused warning
switch(k){
case omp_sched_static :
DEBUG printf("static it's already fair\n");
return;
case omp_sched_dynamic:
chunk_size_new = MAX(1,r/(cfg->threadNum*FAIR_CHUNKS_FOLDING));
break;
//case omp_sched_guided :
//case omp_sched_auto :
//default:
}
if(chunk_size == chunk_size_new) return;
omp_set_schedule(kind,chunk_size_new);
DEBUG{ //check with ICV get
printf("chunksFairFolded:\tchunk adapted to %d\n",chunk_size_new);
ompGetRuntimeSchedule(NULL);
}
}
#endif

@ -0,0 +1,41 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OMPGETICV_H
#define OMPGETICV_H
//only header definitions
/*
* log sched configuration on stdout
* return kind,monotonic,chunkSize if arg0 not NULL
*/
void ompGetRuntimeSchedule(int* );
void ompGetAllICV(); //only if not OMP_GET_ICV_MAIN
float ompVersionMacroMap(); //version number as float using API dates mappings
#endif

@ -0,0 +1,193 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2021 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved.
* Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011-2013 INRIA. All rights reserved.
* Copyright (c) 2015 University of Houston. All rights reserved.
* Copyright (c) 2015-2021 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017-2019 IBM Corporation. All rights reserved.
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
* Copyright (c) 2021-2022 Google, LLC. All rights reserved.
* Copyright (c) 2021-2022 Amazon.com, Inc. or its affiliates. All Rights
* reserved.
* Copyright (c) 2021 Bull S.A.S. All rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* Copyright (c) 2018-2021 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* Error classes and codes
* Do not change the values of these without also modifying mpif.h.in.
*/
#define MPI_SUCCESS 0
#define MPI_ERR_BUFFER 1
#define MPI_ERR_COUNT 2
#define MPI_ERR_TYPE 3
#define MPI_ERR_TAG 4
#define MPI_ERR_COMM 5
#define MPI_ERR_RANK 6
#define MPI_ERR_REQUEST 7
#define MPI_ERR_ROOT 8
#define MPI_ERR_GROUP 9
#define MPI_ERR_OP 10
#define MPI_ERR_TOPOLOGY 11
#define MPI_ERR_DIMS 12
#define MPI_ERR_ARG 13
#define MPI_ERR_UNKNOWN 14
#define MPI_ERR_TRUNCATE 15
#define MPI_ERR_OTHER 16
#define MPI_ERR_INTERN 17
#define MPI_ERR_IN_STATUS 18
#define MPI_ERR_PENDING 19
#define MPI_ERR_ACCESS 20
#define MPI_ERR_AMODE 21
#define MPI_ERR_ASSERT 22
#define MPI_ERR_BAD_FILE 23
#define MPI_ERR_BASE 24
#define MPI_ERR_CONVERSION 25
#define MPI_ERR_DISP 26
#define MPI_ERR_DUP_DATAREP 27
#define MPI_ERR_FILE_EXISTS 28
#define MPI_ERR_FILE_IN_USE 29
#define MPI_ERR_FILE 30
#define MPI_ERR_INFO_KEY 31
#define MPI_ERR_INFO_NOKEY 32
#define MPI_ERR_INFO_VALUE 33
#define MPI_ERR_INFO 34
#define MPI_ERR_IO 35
#define MPI_ERR_KEYVAL 36
#define MPI_ERR_LOCKTYPE 37
#define MPI_ERR_NAME 38
#define MPI_ERR_NO_MEM 39
#define MPI_ERR_NOT_SAME 40
#define MPI_ERR_NO_SPACE 41
#define MPI_ERR_NO_SUCH_FILE 42
#define MPI_ERR_PORT 43
#define MPI_ERR_QUOTA 44
#define MPI_ERR_READ_ONLY 45
#define MPI_ERR_RMA_CONFLICT 46
#define MPI_ERR_RMA_SYNC 47
#define MPI_ERR_SERVICE 48
#define MPI_ERR_SIZE 49
#define MPI_ERR_SPAWN 50
#define MPI_ERR_UNSUPPORTED_DATAREP 51
#define MPI_ERR_UNSUPPORTED_OPERATION 52
#define MPI_ERR_WIN 53
#define MPI_T_ERR_MEMORY 54
#define MPI_T_ERR_NOT_INITIALIZED 55
#define MPI_T_ERR_CANNOT_INIT 56
#define MPI_T_ERR_INVALID_INDEX 57
#define MPI_T_ERR_INVALID_ITEM 58
#define MPI_T_ERR_INVALID_HANDLE 59
#define MPI_T_ERR_OUT_OF_HANDLES 60
#define MPI_T_ERR_OUT_OF_SESSIONS 61
#define MPI_T_ERR_INVALID_SESSION 62
#define MPI_T_ERR_CVAR_SET_NOT_NOW 63
#define MPI_T_ERR_CVAR_SET_NEVER 64
#define MPI_T_ERR_PVAR_NO_STARTSTOP 65
#define MPI_T_ERR_PVAR_NO_WRITE 66
#define MPI_T_ERR_PVAR_NO_ATOMIC 67
#define MPI_ERR_RMA_RANGE 68
#define MPI_ERR_RMA_ATTACH 69
#define MPI_ERR_RMA_FLAVOR 70
#define MPI_ERR_RMA_SHARED 71
#define MPI_T_ERR_INVALID 72
#define MPI_T_ERR_INVALID_NAME 73
#define MPI_ERR_PROC_ABORTED 74
/* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */
#define MPI_ERR_PROC_FAILED 75
#define MPI_ERR_PROC_FAILED_PENDING 76
#define MPI_ERR_REVOKED 77
/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined
MPI_ERR_<foo> code. Set the last code to allow some room for adding
error codes without breaking ABI. */
#define MPI_ERR_LASTCODE 92
/*
* Comparison results. Don't change the order of these, the group
* comparison functions rely on it.
* Do not change the order of these without also modifying mpif.h.in.
*/
enum {
MPI_IDENT,
MPI_CONGRUENT,
MPI_SIMILAR,
MPI_UNEQUAL
};
/*
* MPI_Init_thread constants
* Do not change the order of these without also modifying mpif.h.in.
*/
enum {
MPI_THREAD_SINGLE,
MPI_THREAD_FUNNELED,
MPI_THREAD_SERIALIZED,
MPI_THREAD_MULTIPLE
};
/*
* Datatype combiners.
* Do not change the order of these without also modifying mpif.h.in.
* (see also mpif-common.h.fin).
*/
enum {
MPI_COMBINER_NAMED,
MPI_COMBINER_DUP,
MPI_COMBINER_CONTIGUOUS,
MPI_COMBINER_VECTOR,
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
MPI_COMBINER_HVECTOR_INTEGER,
#else
OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER, /* preserve ABI compatibility */
#endif
MPI_COMBINER_HVECTOR,
MPI_COMBINER_INDEXED,
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
MPI_COMBINER_HINDEXED_INTEGER,
#else
OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER, /* preserve ABI compatibility */
#endif
MPI_COMBINER_HINDEXED,
MPI_COMBINER_INDEXED_BLOCK,
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
MPI_COMBINER_STRUCT_INTEGER,
#else
OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER, /* preserve ABI compatibility */
#endif
MPI_COMBINER_STRUCT,
MPI_COMBINER_SUBARRAY,
MPI_COMBINER_DARRAY,
MPI_COMBINER_F90_REAL,
MPI_COMBINER_F90_COMPLEX,
MPI_COMBINER_F90_INTEGER,
MPI_COMBINER_RESIZED,
MPI_COMBINER_HINDEXED_BLOCK
};
////MOCK ERRHANDELERS
#define OMPI_ERRHANDLER_INVOKE(comm,err,fname) err
#define OMPI_ERRHANDLER_NOHANDLE_INVOKE(err,fname) err

@ -0,0 +1,90 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef PARSER
#define PARSER
#include "mmio.h"
#include "sparseMatrix.h"
typedef struct{
ulong row;
ulong col;
double val;
} entry; //MatrixMarket COO entry
typedef struct{
MM_typecode mcode;
entry* entries;
ulong* rowLens;
ulong M,N,NZ; //spmat sizes
} MatrixMarket;
////COO PARSE
//parse and check MatrixMarket mat in @matPath file
MatrixMarket* MMRead(char* matPath);
void freeMatrixMarket(MatrixMarket* mm);
//basic check for sparse matrix compliance to the app, return posix bool
int MMCheck(MM_typecode typecode);
/*
* parse MatrixMarket matrix entries in @fp, of type @mcode
* into COOrdinate list of entries
* -> expand simmetric matrixes into a normal matrix with both parts
* so NZ will be inplace doubled
* return allocated and filled COO entries with the NNZ number into
* NO SORT CHECKING HERE
*/
entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens);
////COO -> ANYTHING ELSE CONVERSION
/*
* write COO entries in @entries inside sparse matrix @mat in ELL format
* EXPECTED: CSR arrays allocated, @entries col sorted in (not madatory consecut) rows
* [simmetrical parts explicitly rappresented --> not important here]
*/
int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens);
/*
* write COO entries in @entries inside sparse matrix @mat in ELL format
* EXPECTED: @entries col sorted in (not madatory consecut) rows
* ELL internal array allocated in this function, not freed in case of error
*/
int COOtoELL(entry* entries, spmat* mat, ulong* rowLens);
////wrapper MM -> specialized target
/*
* Parse MatrixMarket matrix stored in file at @matPath
* IMPLEMENTED WRAPPING: MMtoCOO -> COOtoCSR
* Returns: allocated spmat sparse matrix with all field allocated
* symetric matrix are expanded in a full matrix
*/
spmat* MMtoCSR(char* matPath);
spmat* MMtoELL(char* matPath);
#endif

@ -0,0 +1,216 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
//sparse matrix def & aux
#ifndef SPARSEMATRIX
#define SPARSEMATRIX
#include <stdlib.h>
#include <string.h>
#include "macros.h"
#include "config.h"
typedef struct {
idx_t NZ,M,N;
double *AS;
idx_t* JA;
//CSR SPECIFIC
idx_t* IRP;
#ifdef ROWLENS
idx_t* RL; //row lengths
#endif
//CUDA SPECIFIC
idx_t MAX_ROW_NZ;
} spmat; //describe a sparse matrix
//smart index keeping in a dense map
typedef struct{
idx_t len; //num of nnz idx accumulated
/* nnz index presence packing, implict space enough for all possible indexes*/
nnz_idxs_flags_t idxsMap;
uint idxsMapN; //either num of limbs or len of char flag array
} SPVECT_IDX_DENSE_MAP;
int initSpVectIdxDenseAcc(idx_t idxMax, SPVECT_IDX_DENSE_MAP* );
inline void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc){
acc->len = 0;
memset(acc->idxsMap,0,sizeof(*acc->idxsMap)*acc->idxsMapN);
}
inline void _freeIdxMap(SPVECT_IDX_DENSE_MAP* acc){
free(acc->idxsMap);
free(acc);
}
//aux struct for sparse vector-scalar product accumualtion
typedef struct{
double* v; //aux accumulating dense vector (sparse)
idx_t* nnzIdx; //v nnz value's indexes (contiguos)
idx_t vLen; //size of the aux dense vector //TODO USELESS?
SPVECT_IDX_DENSE_MAP nnzIdxMap; //
} ACC_DENSE;
int allocAccDense(ACC_DENSE* v, ulong size);
/*
**** NNZ IDXS PRESENCE FLAGS ACCESS INTERFACE: ***
* sp_vect_idx_set(idx,SPVECT_IDX_DENSE_MAP)
* -> return 0 if idx isn't already setted and in case set it
*/
inline int spVect_idx_in(idx_t idx, SPVECT_IDX_DENSE_MAP* idxsMapAcc){
#if SPVECT_IDX_BITWISE == TRUE
uint limbID = idx / LIMB_SIZE_BIT; //idx's limb id
uint limbIdxID = idx % LIMB_SIZE_BIT; //idx's pos in limb
DEBUGCHECKS assert( limbID < idxsMapAcc->idxsMapN );
limb_t idxPos = ((limb_t) 1) << limbIdxID;
if (!(idxsMapAcc->idxsMap[limbID] & idxPos) ){
idxsMapAcc->idxsMap[limbID] |= idxPos;
idxsMapAcc->len++;
return 0;
}
#else
//assert( idx < idxsMapAcc->idxsMapN ); //TODO
if (!( idxsMapAcc->idxsMap[idx] )){ //TODO usabile primitiva atomica di cmpswap, accorpamento ops a livello HW?
idxsMapAcc->idxsMap[idx] = 1;
idxsMapAcc->len++;
return 0;
}
#endif //SPVECT_IDX_BITWISE == TRUE
return 1;
}
inline void _resetAccVect(ACC_DENSE* acc){
memset(acc->v, 0, acc->vLen * sizeof(*(acc->v)));
memset(acc->nnzIdx, 0, acc->vLen * sizeof(*(acc->nnzIdx)));
_resetIdxMap(&acc->nnzIdxMap);
}
////Sparse vector accumulator -- corresponding to a matrix portion
typedef struct{
//idx_t r; //row index in the corresponding matrix
//idx_t c; //col index in the corresponding matrix
idx_t len; //rowLen
double* AS; //row nnz values
idx_t* JA; //row nnz colIndexes
} SPACC;
/*
* ARRAY BISECTION - RECURSIVE VERSION
* TODO ASSERT LEN>0 ommitted
*/
inline int BISECT_ARRAY(idx_t target, idx_t* arr, idx_t len){
//if (len == 0) return FALSE;
if (len <= 1) return *arr == target;
idx_t middleIdx = (len-1) / 2; //len=5-->2, len=4-->1
idx_t middle = arr[ middleIdx ];
if (target == middle) return TRUE;
else if (target < middle) return BISECT_ARRAY(target,arr,middleIdx);
else return BISECT_ARRAY(target,arr+middleIdx+1,middleIdx + (len-1)%2);
}
/*
* return !0 if col @j idx is in row @i of sparse mat @smat
* bisection used --> O(log_2(ROWLENGHT))
*/
inline int IS_NNZ(spmat* smat,idx_t i,idx_t j){
idx_t rStart = smat->IRP[i];
idx_t rLen = smat->IRP[i+1] - rStart;
if (!rLen) return FALSE;
return BISECT_ARRAY(j,smat->JA + rStart,rLen);
}
inline int IS_NNZ_linear(spmat* smat,idx_t i,idx_t j){ //linear -> O(ROWLENGHT)
int out = 0;
for (idx_t x=smat->IRP[i]; x<smat->IRP[i+1] && !out; x++){
out = (j == smat->JA[x]);
}
return out;
}
////aux functions
//free sparse matrix
inline void freeSpmatInternal(spmat* mat){
if(!mat) return;
free(mat->AS);
free(mat->JA);
free(mat->IRP);
#ifdef ROWLENS
free(mat->RL);
#endif
}
inline void freeSpmat(spmat* mat){
if (!mat) return;
freeSpmatInternal(mat);
free(mat);
}
static inline void zeroSpmat(spmat* m) {
memset(m->AS, 0, sizeof(*(m->AS)) * m->NZ);
memset(m->JA, 0, sizeof(*(m->JA)) * m->NZ);
memset(m->IRP, 0, sizeof(*(m->IRP)) * (m->M + 1));
}
//free max aux structs not NULL pointed
inline void freeSpAcc(SPACC* r){
free(r->AS);
free(r->JA);
}
////alloc&init functions
//alloc&init internal structures only dependent of dimensions @rows,@cols
inline int allocSpMatrixInternal(idx_t rows, idx_t cols, spmat* mat){
mat -> M = rows;
mat -> N = cols;
if (!(mat->IRP=calloc(mat->M+1,sizeof(*(mat->IRP))))){ //calloc only for 0th
ERRPRINT("IRP calloc err\n");
freeSpmatInternal(mat);
return EXIT_FAILURE;
}
#ifdef ROWLENS
if (!(mat->RL = malloc(mat->M*sizeof(*(mat->RL))))){
ERRPRINT("RL calloc err\n");
freeSpmatInternal(mat);
return EXIT_FAILURE;
}
#endif
return EXIT_SUCCESS;
}
//alloc a sparse matrix of @rows rows and @cols cols
inline spmat* allocSpMatrix(idx_t rows, idx_t cols){
spmat* mat;
if (!(mat = calloc(1,sizeof(*mat)))) {
ERRPRINT("mat calloc failed\n");
return NULL;
}
if (allocSpMatrixInternal(rows,cols,mat)){
free(mat);
return NULL;
}
return mat;
}
#endif

@ -0,0 +1,138 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "sparseMatrix.h"
#include "config.h"
#ifndef OFF_F
#pragma error OFF_F required
#endif
//////////////////////////////// CSR SPECIFIC /////////////////////////////////
///SPARSE MATRIX PARTITIONING
/*
* partition CSR sparse matrix @A in @gridCols columns partitions
* returning an offsets matrix out[i][j] = start of jth colPartition of row i
* subdivide @A columns in uniform cols ranges in the output
*/
idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols);
/*
* partition CSR sparse matrix @A in @gridCols columns partitions as
* indipended and allocated sparse matrixes and return them
* subdivide @A columns in uniform cols ranges in the output
*/
spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols);
//same as above but with (aux) use of offsets partitoning (also returned if colOffsets!=NULL
spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols,idx_t** colPartsOffsets);
//same as checkOverallocPercent but with 2D partitioning - CSR col partitioning
void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB,
idx_t gridCols,idx_t* bColOffsets);
///////////////////////////////////////////////////////////////////////////////
///Single implementations headers
#ifndef SPARSEUTILS_H_COMMON_IDX_IMPLS
#define SPARSEUTILS_H_COMMON_IDX_IMPLS
//shift every index about the sparse data for use the matrix in a fortran app
inline void C_FortranShiftIdxs(spmat* m){
for(ulong r=0; r<m->M+1; m -> IRP[r]++,r++);
for(ulong i=0; i<m->NZ; m -> JA[i]++, i++);
}
//shift every index about the sparse data for use the matric in a C app
inline void Fortran_C_ShiftIdxs(spmat* m){ //TODO DBG ONLY and compleatness
for(ulong r=0; r<m->M+1; m -> IRP[r]--,r++);
for(ulong i=0; i<m->NZ; m -> JA[i]--, i++);
}
/*
* check SpMM resulting matrix @AB = A * B nnz distribution in rows
* with the preallocated, forecasted size in @forecastedSizes
* in @forecastedSizes there's for each row -> forecasted size
* and in the last entry the cumulative of the whole matrix
*/
void checkOverallocPercent(ulong* forecastedSizes,spmat* AB);
/*
check if sparse matrixes A<->B differ up to
DOUBLE_DIFF_THREASH per element
*/
int spmatDiff(spmat* A, spmat* B);
////dyn alloc of spMM output matrix
/*
///size prediction of AB = @A * @B
inline ulong SpMMPreAlloc(spmat* A,spmat* B){
//TODO BETTER PREALLOC HEURISTICS HERE
return MAX(A->NZ,B->NZ);
}
//init a sparse matrix AB=@A * @B with a initial allocated space by an euristic
inline spmat* initSpMatrixSpMM(spmat* A, spmat* B){
spmat* out;
if (!(out = allocSpMatrix(A->M,B->N))) return NULL;
out -> NZ = SpMMPreAlloc(A,B);
if (!(out->AS = malloc(out->NZ*sizeof(*(out->AS))))){
ERRPRINT("initSpMatrix: out->AS malloc errd\n");
free(out);
return NULL;
}
if (!(out->JA = malloc(out->NZ*sizeof(*(out->JA))))){
ERRPRINT("initSpMatrix: out->JA malloc errd\n");
freeSpmat(out);
return NULL;
}
return out;
}
#define REALLOC_FACTOR 1.5
//realloc sparse matrix NZ arrays
inline int reallocSpMatrix(spmat* mat,ulong newSize){
mat->NZ *= newSize;
void* tmp;
if (!(tmp = realloc(mat->AS,mat->NZ * sizeof(*(mat->AS))))){
ERRPRINT("reallocSpMatrix: realloc AS errd\n");
return EXIT_FAILURE;
}
mat->AS = tmp;
if (!(tmp = realloc(mat->JA,mat->NZ * sizeof(*(mat->JA))))){
ERRPRINT("reallocSpMatrix: realloc JA errd\n");
return EXIT_FAILURE;
}
mat->JA = tmp;
return EXIT_SUCCESS;
}
*/
////MISC
//print useful information about 3SPMM about to compute
void print3SPMMCore(spmat* R,spmat* AC,spmat* P,CONFIG* conf);
void printSparseMatrix(spmat* sparseMat,char justNZMarkers);
/*convert @sparseMat sparse matrix in dense matrix returned*/
double* CSRToDense(spmat* sparseMat);
void freeAccsDense(ACC_DENSE* vectors,ulong num);
#endif //SPARSEUTILS_H_COMMON_IDX_IMPLS

@ -0,0 +1,52 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SPARSEUTILSMULTI_H
#define SPARSEUTILSMULTI_H
#ifdef OFF_F //save "includer" OFF_F value before overwriting it
#pragma push_macro("OFF_F")
#define _OFF_F_OLD
#undef OFF_F
#endif
#define OFF_F 0
#include "sparseUtilsGeneric.h"
#undef OFF_F
#define OFF_F 1
#include "sparseUtilsGeneric.h"
#undef OFF_F
#ifdef _OFF_F_OLD
#pragma pop_macro("OFF_F")
#undef _OFF_F_OLD
#endif
#endif

@ -0,0 +1,181 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTILS
#define UTILS
//#include <stddef.h>
#include "macros.h"
#include "linuxK_rbtree_minimalized.h"
extern int urndFd; //file pointer to DRNG_DEVFILE O_RDONLY opened
int init_urndfd(); // wrap init urndFd
///IO
//UNBUFFERED IO
/*
* urndFd usage template to populate random timeout
if(_read_wrap(urndFd,(char*)&timeout,sizeof(timeout))<0){
fprintf(stderr,"rnd read for thread's timeout failed");
ret=EXIT_FAILURE;
goto end;
}
*/
//wrap read cycle over @fd
int read_wrap(int fd,void* dst,size_t count);
int fread_wrap(FILE* fp,void* dst,size_t count);
//dual of read_wrap
int write_wrap(int fd,void* src,size_t count);
//create or open file at @outFpath for write
int createNewFile(char* const outFpath);
///STRUCTURED DATA IO
#define DOUBLE_STR_FORMAT "%25le\n"
//write double vector @v as row sequence of double at @fpath
//e.g. read with od -tf8 -w8 fpath : OCTALOFFSET: DOUBLE FULL DIGITS
int writeDoubleVector(char* fpath,double* v,ulong size);
/*
* read vector of double [Str] of arbitrary size from @fpath, true lenght in *size
* if size point to a nnz value, the initial allocation will be of *size
* eventual successive reallocation done multipling *size with VECTOR_STEP_REALLOC
*/
double* readDoubleVector(char* fpath,ulong* size);
double* readDoubleVectorStr(char* fpath,ulong* size);
///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF
//dual of readDoubleVectorVector
int writeDoubleVectorAsStr(char* fpath,double* v,ulong size);
int MPI_Dims_create(int nnodes, int ndims, int dims[]); //commons/ompi_dims_create/ompi_dims_create.c
#include "config.h"
///config from ENV
#define GRID_ROWS "GRID_ROWS"
#define GRID_COLS "GRID_COLS"
//parse configuration from env
int getConfig(CONFIG* conf);
//append only list implemented with a reallocated array
typedef struct{
ulong* a;
ulong size;
ulong lastIdx;
} APPENDARRAY;
//append @val to @list, reallocating if reached end
//TODO inline int appendArr(ulong val,APPENDARRAY* list);
void sortuint(uint* arr, uint len); //sort uint array @arr of @len elements
void sort_idx_t(idx_t* arr, idx_t len);
void sortulong(ulong* arr, ulong len);
void sortRbNode(rbNode* arr,idx_t len);
///ranges functions
/*
* considering @rangesN uniform ranges from [0,rangesEnd)
* return the rangeId that element @i is in
*/
inline ushort matchingUnifRangeIdxLinear(idx_t i,idx_t rangesEnd,ushort rangesN){
double rangeW = rangesEnd / (double) rangesN;
for(idx_t rEnd = rangeW,j=0; j < rangesN; rEnd = rangeW * (j++)){
if( i < rEnd ) return j;
}
assert( FALSE ); //i should be in a range!
return EXIT_FAILURE;
}
/*
* find which range @idx match among
* a uniform range divion of @size element in @rangesN ranges with carried reminder
* return 0based idx of matched range
*/
inline ushort matchingUnifRangeIdx(idx_t idx, idx_t size, ushort rangesN){
idx_t rangeW = size/rangesN, rangeRem = size%rangesN;
idx_t searchStart,searchEnd;
//!IN_RANGE(idx,unifRemShareBlock(idx,rangeW,rangeRem)) && IN_RANGE(r,0,rangesN);){
for(ushort r=rangesN/2-1, hMoveWidth=rangesN/2; hMoveWidth>0;hMoveWidth/=2){
searchStart = unifRemShareStart(idx,rangeW,rangeRem);
searchEnd = unifRemShareEnd(idx,rangeW,rangeRem);
if (IN_RANGE(idx,searchStart,searchEnd)) return r;
else if (idx < searchStart) r = AVG(0,r);
else r = AVG(r,rangesN-1);
}
assert(FALSE);
}
///reductionUtils
inline idx_t reductionSumSeq(idx_t* arr,idx_t arrLen){
idx_t i,out;
for(i=0,out=0; i<arrLen; out += arr[i++] );
return out;
}
inline idx_t reductionMaxSeq(idx_t* arr,idx_t arrLen){
idx_t i,out;
for( i=0,out=0; i<arrLen; i++,out=out<arr[i]?arr[i]:out );
return out;
}
inline idx_t reductionSumOmp(idx_t* arr,idx_t arrLen){
idx_t out;
#pragma omp parallel for reduction(+:out)
for(idx_t i=0; i<arrLen; i++){
out += arr[i];
}
return out;
}
inline idx_t reductionMaxOmp(idx_t* arr,idx_t arrLen){
idx_t out;
#pragma omp parallel for reduction(max:out)
for(idx_t i=0; i<arrLen; i++){ out=out<arr[i]?arr[i]:out; }
return out;
}
/*
* return 0 if vectors a and b has elements that differ at most of DOUBLE_DIFF_THREASH
* if diffMax!=NULL save there the max difference value
* of the 2 entries of the input vectors, signed as a[i] - b[i] (as well as dump prints)
* CONVENTION: @a = true result, @b vector to check with the true result
*/
int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax);
//fill a random vector in @v long @size doubles
int fillRndVector(ulong size, double* v);
//read vector as a sequence of space separated double from file at @fpath
#define VECTOR_STEP_MALLOC 100
/*
* decompress file at @path into @tmpFsDecompressPath,
* decompression command obtanined first looking at the extension
* then matching it with a list of avaible decompression cmd
* that can be make as shell cmd adding @path > @tmpFsDecompressPath
* e.g. decompress xz -d -c @path > @tmpFsDecompressPath
* Returns: -1 if decompression wasn't possible otherwise decompress command exti status
*/
int extractInTmpFS(char* path, char* tmpFsDecompressPath);
//compute E[@values] in @out[0] and VAR[@values] in @out[1] of @numVals values
void statsAvgVar(double* values,uint numVals, double* out);
void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers);
void printVector(double* v,ulong size);
void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen);
#endif

@ -0,0 +1,414 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
Red Black Trees
(C) 1999 Andrea Arcangeli <andrea@suse.de>
(C) 2002 David Woodhouse <dwmw2@infradead.org>
(C) 2012 Michel Lespinasse <walken@google.com>
linux/lib/rbtree.c
*/
/*
* RedBlackTree_linux_userspace
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
///#include <linux/rbtree_augmented.h> //TODO LESS_DEPENDENCIES
#include "linuxK_rbtree_minimalized.h" ///fulfill embedded deps needed //TODO LESS_DEPENDENCIES
//#include <linux/export.h>
/* MINIMALIZED VERSION WITHOUT THE IMPORT OF rbtree_augmented
* red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
*
* 1) A node is either red or black
* 2) The root is black
* 3) All leaves (NULL) are black
* 4) Both children of every red node are black
* 5) Every simple path from root to leaves contains the same number
* of black nodes.
*
* 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
* consecutive red nodes in a path and every red node is therefore followed by
* a black. So if B is the number of black nodes on every simple path (as per
* 5), then the longest possible path due to 4 is 2B.
*
* We shall indicate color with case, where black nodes are uppercase and red
* nodes will be lowercase. Unknown color nodes shall be drawn as red within
* parentheses and have some accompanying text comment.
*/
/*
* Notes on lockless lookups:
*
* All stores to the tree structure (rb_left and rb_right) must be done using
* WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
* tree structure as seen in program order.
*
* These two requirements will allow lockless iteration of the tree -- not
* correct iteration mind you, tree rotations are not atomic so a lookup might
* miss entire subtrees.
*
* But they do guarantee that any such traversal will only see valid elements
* and that it will indeed complete -- does not get stuck in a loop.
*
* It also guarantees that if the lookup returns an element it is the 'correct'
* one. But not returning an element does _NOT_ mean it's not present.
*
* NOTE:
*
* Stores to __rb_parent_color are not important for simple lookups so those
* are left undone as of now. Nor did I check for loops involving parent
* pointers.
*/
///rb_set_black //TODO ADDONLY_GENERIC
static inline struct rb_node *rb_red_parent(struct rb_node *red)
{
return (struct rb_node *)red->__rb_parent_color;
}
/*
* Helper function for rotations:
* - old's parent and color get assigned to new
* - old gets assigned new as a parent and 'color' as a color.
*/
static inline void
__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
struct rb_root *root, int color)
{
struct rb_node *parent = rb_parent(old);
new->__rb_parent_color = old->__rb_parent_color;
rb_set_parent_color(old, new, color);
__rb_change_child(old, new, parent, root);
}
///TODO REMOVE AGUMENT
static __always_inline void
__rb_insert(struct rb_node *node, struct rb_root *root)
{
struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
while (true) {
/*
* Loop invariant: node is red.
*/
if (unlikely(!parent)) {
/*
* The inserted node is root. Either this is the
* first node, or we recursed at Case 1 below and
* are no longer violating 4).
*/
rb_set_parent_color(node, NULL, RB_BLACK);
break;
}
/*
* If there is a black parent, we are done.
* Otherwise, take some corrective action as,
* per 4), we don't want a red root or two
* consecutive red nodes.
*/
if(rb_is_black(parent))
break;
gparent = rb_red_parent(parent);
tmp = gparent->rb_right;
if (parent != tmp) { /* parent == gparent->rb_left */
if (tmp && rb_is_red(tmp)) {
/*
* Case 1 - node's uncle is red (color flips).
*
* G g
* / \ / \
* p u --> P U
* / /
* n n
*
* However, since g's parent might be red, and
* 4) does not allow this, we need to recurse
* at g.
*/
rb_set_parent_color(tmp, gparent, RB_BLACK);
rb_set_parent_color(parent, gparent, RB_BLACK);
node = gparent;
parent = rb_parent(node);
rb_set_parent_color(node, parent, RB_RED);
continue;
}
tmp = parent->rb_right;
if (node == tmp) {
/*
* Case 2 - node's uncle is black and node is
* the parent's right child (left rotate at parent).
*
* G G
* / \ / \
* p U --> n U
* \ /
* n p
*
* This still leaves us in violation of 4), the
* continuation into Case 3 will fix that.
*/
tmp = node->rb_left;
WRITE_ONCE(parent->rb_right, tmp);
WRITE_ONCE(node->rb_left, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
rb_set_parent_color(parent, node, RB_RED);
///augment_rotate(parent, node); //TODO DEL AUGMENTED DEP
parent = node;
tmp = node->rb_right;
}
/*
* Case 3 - node's uncle is black and node is
* the parent's left child (right rotate at gparent).
*
* G P
* / \ / \
* p U --> n g
* / \
* n U
*/
WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
WRITE_ONCE(parent->rb_right, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
///augment_rotate(gparent, parent); //TODO DEL_AUGMENTED_DEP
break;
} else {
tmp = gparent->rb_left;
if (tmp && rb_is_red(tmp)) {
/* Case 1 - color flips */
rb_set_parent_color(tmp, gparent, RB_BLACK);
rb_set_parent_color(parent, gparent, RB_BLACK);
node = gparent;
parent = rb_parent(node);
rb_set_parent_color(node, parent, RB_RED);
continue;
}
tmp = parent->rb_left;
if (node == tmp) {
/* Case 2 - right rotate at parent */
tmp = node->rb_right;
WRITE_ONCE(parent->rb_left, tmp);
WRITE_ONCE(node->rb_right, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
rb_set_parent_color(parent, node, RB_RED);
///augment_rotate(parent, node); //TODO DEL_AUGMENTED_DEP
parent = node;
tmp = node->rb_left;
}
/* Case 3 - left rotate at gparent */
WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
WRITE_ONCE(parent->rb_left, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
///augment_rotate(gparent, parent); //TODO DEL_AUGMENTED_DEP
break;
}
}
}
/*
* Inline version for rb_erase() use - we want to be able to inline
* and eliminate the dummy_rotate callback there
*/
///____rb_erase_color //TODO ADDONLY_GENERIC
/* Non-inline version for rb_erase_augmented() use */
///__rb_erase_color //TODO ADDONLY_GENERIC
/* TODO LESS_DEPENDENCIES MINIMALIZED
* Non-augmented rbtree manipulation functions.
*
* We use dummy augmented callbacks here, and have the compiler optimize them
* out of the rb_insert_color() and rb_erase() function definitions.
static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
static const struct rb_augment_callbacks dummy_callbacks = {
.propagate = dummy_propagate,
.copy = dummy_copy,
.rotate = dummy_rotate
};*/
void rb_insert_color(struct rb_node *node, struct rb_root *root)
{
__rb_insert(node, root);
}
///rb_erase //TODO ADDONLY_GENERIC
/** TODO LESS_DEPENDENCIES - AUGMENTED REMOVE
* Augmented rbtree manipulation functions.
*
* This instantiates the same __always_inline functions as in the non-augmented
* case, but this time with user-defined callbacks.
*/
///__rb_insert_augmented todo LESS_DEPENDENCIES - AUGMENTED REMOVE
////// Traversing
/*
* This function returns the first node (in sort order) of the tree.
*/
struct rb_node *rb_first(const struct rb_root *root)
{
struct rb_node *n;
n = root->rb_node;
if (!n)
return NULL;
while (n->rb_left)
n = n->rb_left;
return n;
}
struct rb_node *rb_last(const struct rb_root *root)
{
struct rb_node *n;
n = root->rb_node;
if (!n)
return NULL;
while (n->rb_right)
n = n->rb_right;
return n;
}
struct rb_node *rb_next(const struct rb_node *node)
{
struct rb_node *parent;
if (RB_EMPTY_NODE(node))
return NULL;
/*
* If we have a right-hand child, go down and then left as far
* as we can.
*/
if (node->rb_right) {
node = node->rb_right;
while (node->rb_left)
node = node->rb_left;
return (struct rb_node *)node;
}
/*
* No right-hand children. Everything down and left is smaller than us,
* so any 'next' node must be in the general direction of our parent.
* Go up the tree; any time the ancestor is a right-hand child of its
* parent, keep going up. First time it's a left-hand child of its
* parent, said parent is our 'next' node.
*/
while ((parent = rb_parent(node)) && node == parent->rb_right)
node = parent;
return parent;
}
struct rb_node *rb_prev(const struct rb_node *node)
{
struct rb_node *parent;
if (RB_EMPTY_NODE(node))
return NULL;
/*
* If we have a left-hand child, go down and then right as far
* as we can.
*/
if (node->rb_left) {
node = node->rb_left;
while (node->rb_right)
node = node->rb_right;
return (struct rb_node *)node;
}
/*
* No left-hand children. Go up till we find an ancestor which
* is a right-hand child of its parent.
*/
while ((parent = rb_parent(node)) && node == parent->rb_left)
node = parent;
return parent;
}
///rb_replace_node //TODO ADDONLY_GENERIC
///rb_replace_node_rcu //TODO LESS_DEPENDENCIES
static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
{
for (;;) {
if (node->rb_left)
node = node->rb_left;
else if (node->rb_right)
node = node->rb_right;
else
return (struct rb_node *)node;
}
}
struct rb_node *rb_next_postorder(const struct rb_node *node)
{
const struct rb_node *parent;
if (!node)
return NULL;
parent = rb_parent(node);
/* If we're sitting on node, we've already seen our children */
if (parent && node == parent->rb_left && parent->rb_right) {
/* If we are the parent's left node, go to the parent's right
* node then all the way down to the left */
return rb_left_deepest_node(parent->rb_right);
} else
/* Otherwise we are the parent's right node, and the parent
* should be next */
return (struct rb_node *)parent;
}
struct rb_node *rb_first_postorder(const struct rb_root *root)
{
if (!root->rb_node)
return NULL;
return rb_left_deepest_node(root->rb_node);
}

@ -0,0 +1,511 @@
/*
* Matrix Market I/O library for ANSI C
*
* See http://math.nist.gov/MatrixMarket for details.
*
*
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "mmio.h"
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
double **val_, int **I_, int **J_)
{
FILE *f;
MM_typecode matcode;
int M, N, nz;
int i;
double *val;
int *I, *J;
if ((f = fopen(fname, "r")) == NULL)
return -1;
if (mm_read_banner(f, &matcode) != 0)
{
printf("mm_read_unsymetric: Could not process Matrix Market banner ");
printf(" in file [%s]\n", fname);
return -1;
}
if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
mm_is_sparse(matcode)))
{
fprintf(stderr, "Sorry, this application does not support ");
fprintf(stderr, "Market Market type: [%s]\n",
mm_typecode_to_str(matcode));
return -1;
}
/* find out size of sparse matrix: M, N, nz .... */
if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
{
fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
return -1;
}
*M_ = M;
*N_ = N;
*nz_ = nz;
/* reseve memory for matrices */
I = (int *) malloc(nz * sizeof(int));
J = (int *) malloc(nz * sizeof(int));
val = (double *) malloc(nz * sizeof(double));
*val_ = val;
*I_ = I;
*J_ = J;
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
for (i=0; i<nz; i++)
{
fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
I[i]--; /* adjust from 1-based to 0-based */
J[i]--;
}
fclose(f);
return 0;
}
int mm_is_valid(MM_typecode matcode)
{
if (!mm_is_matrix(matcode)) return 0;
if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) ||
mm_is_skew(matcode))) return 0;
return 1;
}
int mm_read_banner(FILE *f, MM_typecode *matcode)
{
char line[MM_MAX_LINE_LENGTH];
char banner[MM_MAX_TOKEN_LENGTH];
char mtx[MM_MAX_TOKEN_LENGTH];
char crd[MM_MAX_TOKEN_LENGTH];
char data_type[MM_MAX_TOKEN_LENGTH];
char storage_scheme[MM_MAX_TOKEN_LENGTH];
char *p;
mm_clear_typecode(matcode);
if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
return MM_PREMATURE_EOF;
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
storage_scheme) != 5)
return MM_PREMATURE_EOF;
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
/* check for banner */
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
return MM_NO_HEADER;
/* first field should be "mtx" */
if (strcmp(mtx, MM_MTX_STR) != 0)
return MM_UNSUPPORTED_TYPE;
mm_set_matrix(matcode);
/* second field describes whether this is a sparse matrix (in coordinate
storgae) or a dense array */
if (strcmp(crd, MM_SPARSE_STR) == 0)
mm_set_sparse(matcode);
else
if (strcmp(crd, MM_DENSE_STR) == 0)
mm_set_dense(matcode);
else
return MM_UNSUPPORTED_TYPE;
/* third field */
if (strcmp(data_type, MM_REAL_STR) == 0)
mm_set_real(matcode);
else
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
mm_set_complex(matcode);
else
if (strcmp(data_type, MM_PATTERN_STR) == 0)
mm_set_pattern(matcode);
else
if (strcmp(data_type, MM_INT_STR) == 0)
mm_set_integer(matcode);
else
return MM_UNSUPPORTED_TYPE;
/* fourth field */
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
mm_set_general(matcode);
else
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
mm_set_symmetric(matcode);
else
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
mm_set_hermitian(matcode);
else
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
mm_set_skew(matcode);
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz)
{
if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
{
char line[MM_MAX_LINE_LENGTH];
int num_items_read;
/* set return null parameter values, in case we exit with errors */
*M = *N = *nz = 0;
/* now continue scanning until you reach the end-of-comments */
do
{
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
return MM_PREMATURE_EOF;
}while (line[0] == '%');
/* line[] is either blank or has M,N, nz */
if (sscanf(line, "%d %d %d", M, N, nz) == 3)
return 0;
else
do
{
num_items_read = fscanf(f, "%d %d %d", M, N, nz);
if (num_items_read == EOF) return MM_PREMATURE_EOF;
}
while (num_items_read != 3);
return 0;
}
int mm_read_mtx_array_size(FILE *f, int *M, int *N)
{
char line[MM_MAX_LINE_LENGTH];
int num_items_read;
/* set return null parameter values, in case we exit with errors */
*M = *N = 0;
/* now continue scanning until you reach the end-of-comments */
do
{
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
return MM_PREMATURE_EOF;
}while (line[0] == '%');
/* line[] is either blank or has M,N, nz */
if (sscanf(line, "%d %d", M, N) == 2)
return 0;
else /* we have a blank line */
do
{
num_items_read = fscanf(f, "%d %d", M, N);
if (num_items_read == EOF) return MM_PREMATURE_EOF;
}
while (num_items_read != 2);
return 0;
}
int mm_write_mtx_array_size(FILE *f, int M, int N)
{
if (fprintf(f, "%d %d\n", M, N) != 2)
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
/*-------------------------------------------------------------------------*/
/******************************************************************/
/* use when I[], J[], and val[]J, and val[] are already allocated */
/******************************************************************/
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode)
{
int i;
if (mm_is_complex(matcode))
{
for (i=0; i<nz; i++)
if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
!= 4) return MM_PREMATURE_EOF;
}
else if (mm_is_real(matcode))
{
for (i=0; i<nz; i++)
{
if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
!= 3) return MM_PREMATURE_EOF;
}
}
else if (mm_is_pattern(matcode))
{
for (i=0; i<nz; i++)
if (fscanf(f, "%d %d", &I[i], &J[i])
!= 2) return MM_PREMATURE_EOF;
}
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
double *real, double *imag, MM_typecode matcode)
{
if (mm_is_complex(matcode))
{
if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
!= 4) return MM_PREMATURE_EOF;
}
else if (mm_is_real(matcode))
{
if (fscanf(f, "%d %d %lg\n", I, J, real)
!= 3) return MM_PREMATURE_EOF;
}
else if (mm_is_pattern(matcode))
{
if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
}
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
/************************************************************************
mm_read_mtx_crd() fills M, N, nz, array of values, and return
type code, e.g. 'MCRS'
if matrix is complex, values[] is of size 2*nz,
(nz pairs of real/imaginary values)
************************************************************************/
int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
double **val, MM_typecode *matcode)
{
int ret_code;
FILE *f;
if (strcmp(fname, "stdin") == 0) f=stdin;
else
if ((f = fopen(fname, "r")) == NULL)
return MM_COULD_NOT_READ_FILE;
if ((ret_code = mm_read_banner(f, matcode)) != 0)
return ret_code;
if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) &&
mm_is_matrix(*matcode)))
return MM_UNSUPPORTED_TYPE;
if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
return ret_code;
*I = (int *) malloc(*nz * sizeof(int));
*J = (int *) malloc(*nz * sizeof(int));
*val = NULL;
if (mm_is_complex(*matcode))
{
*val = (double *) malloc(*nz * 2 * sizeof(double));
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
else if (mm_is_real(*matcode))
{
*val = (double *) malloc(*nz * sizeof(double));
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
else if (mm_is_pattern(*matcode))
{
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
if (f != stdin) fclose(f);
return 0;
}
int mm_write_banner(FILE *f, MM_typecode matcode)
{
char *str = mm_typecode_to_str(matcode);
int ret_code;
ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
free(str);
if (ret_code !=2 )
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode)
{
FILE *f;
int i;
if (strcmp(fname, "stdout") == 0)
f = stdout;
else
if ((f = fopen(fname, "w")) == NULL)
return MM_COULD_NOT_WRITE_FILE;
/* print banner followed by typecode */
fprintf(f, "%s ", MatrixMarketBanner);
fprintf(f, "%s\n", mm_typecode_to_str(matcode));
/* print matrix sizes and nonzeros */
fprintf(f, "%d %d %d\n", M, N, nz);
/* print values */
if (mm_is_pattern(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d\n", I[i], J[i]);
else
if (mm_is_real(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
else
if (mm_is_complex(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i],
val[2*i+1]);
else
{
if (f != stdout) fclose(f);
return MM_UNSUPPORTED_TYPE;
}
if (f !=stdout) fclose(f);
return 0;
}
/**
* Create a new copy of a string s. mm_strdup() is a common routine, but
* not part of ANSI C, so it is included here. Used by mm_typecode_to_str().
*
*/
char *mm_strdup(const char *s)
{
int len = strlen(s);
char *s2 = (char *) malloc((len+1)*sizeof(char));
return strcpy(s2, s);
}
char *mm_typecode_to_str(MM_typecode matcode)
{
char buffer[MM_MAX_LINE_LENGTH];
char *types[4];
char *mm_strdup(const char *);
int error =0;
/* check for MTX type */
if (mm_is_matrix(matcode))
types[0] = MM_MTX_STR;
else
error=1;
/* check for CRD or ARR matrix */
if (mm_is_sparse(matcode))
types[1] = MM_SPARSE_STR;
else
if (mm_is_dense(matcode))
types[1] = MM_DENSE_STR;
else
return NULL;
/* check for element data type */
if (mm_is_real(matcode))
types[2] = MM_REAL_STR;
else
if (mm_is_complex(matcode))
types[2] = MM_COMPLEX_STR;
else
if (mm_is_pattern(matcode))
types[2] = MM_PATTERN_STR;
else
if (mm_is_integer(matcode))
types[2] = MM_INT_STR;
else
return NULL;
/* check for symmetry type */
if (mm_is_general(matcode))
types[3] = MM_GENERAL_STR;
else
if (mm_is_symmetric(matcode))
types[3] = MM_SYMM_STR;
else
if (mm_is_hermitian(matcode))
types[3] = MM_HERM_STR;
else
if (mm_is_skew(matcode))
types[3] = MM_SKEW_STR;
else
return NULL;
sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
return mm_strdup(buffer);
}

@ -0,0 +1,391 @@
/*
* Sp3MM_for_AlgebraicMultiGrid
* (C) Copyright 2021-2022
* Andrea Di Iorio
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
* not be used to endorse or promote products derived from this
* software without specific written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "sparseMatrix.h"
#include "mmio.h"
#include "parser.h"
#include "macros.h"
#include "utils.h"
////COO PARSE
int MMCheck(MM_typecode mcode) {
if (!mm_is_matrix(mcode)){ //consistency checks among flags in @mcode
ERRPRINT("invalid matrix: not a matrix\n");
return EXIT_FAILURE;
}
if (mm_is_dense(mcode) ){ //|| mm_is_array(mcode) ){
ERRPRINT("invalid matrix: not a supported sparse matrix\tDENSE MAT\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens){
int scanndRet=0;
ulong nzTrgt=*NZ,nzIdx=0; //expanded num of nz (in case of sym matrix)
ulong diagEntries=0, row = 0, col = 0;//current entry's row,col from MM -> 1 based
double val = 0;
entry* entries = NULL; //COO parsed entries
///init
if (mm_is_symmetric(mcode)){
nzTrgt = 2* (*NZ); //upscale max num of nz in the matrix
VERBOSE printf("MMtoCOO:\tparsing a simmetric matrix\n");
}
if (!(entries = malloc(nzTrgt * sizeof(*entries)))){
ERRPRINT("MMtoCOO: entries malloc errd\n");
return NULL;
}
///parse MM fp lines into COOordinate entries
while (1) { // Reading the fp until EOF
if (mm_is_pattern(mcode)){
scanndRet = fscanf(fp, "%lu %lu\n", &row, &col);
val = 1.0;
} else if (mm_is_real(mcode) || (mm_is_integer(mcode))){
scanndRet = fscanf(fp, "%lu %lu %lf\n", &row, &col, &val);
}
if (scanndRet == EOF){ //TODO more strict check with type&ret?
if (ferror(fp)){
perror("fscanf EOF");
goto _err;
} else break;
}
CONSISTENCY_CHECKS{
//TODO USELESS ? ? ?
if ((mm_is_pattern(mcode) && scanndRet != 2) ||
(!mm_is_pattern(mcode) && scanndRet != 3)){
ERRPRINT("invalid matrix: not consistent entry scannable\n");
goto _err;
}
}
////ADD THE CURRENT MATRIX ENTRY
rowLens[row-1]++;
entries[nzIdx++]=(entry) { .row=row-1, .col=col-1, .val=val };
//also mirrored entry if sym.matrix with reflected idx inside matrix limits
if (mm_is_symmetric(mcode) && row != col ){
//TODO COSTRAINED FORMAT ?&& row <= mat->N && col <= mat->M ){
SWAP(row,col);
rowLens[row-1]++;
entries[nzIdx++]=(entry) { .row=row-1, .col=col-1, .val=val };
}
else diagEntries++; //for CONSISTENCY_CHECKS only
}
//CONSISTENCY_CHECKS
nzTrgt = *NZ;
if(mm_is_symmetric(mcode)) nzTrgt = 2*(*NZ) - diagEntries;
assert( nzIdx == nzTrgt );
//update NZ
*NZ = nzIdx;
return entries;
_err:
free(entries); return NULL;
}
void freeMatrixMarket(MatrixMarket* mm){
if (!mm) return;
free(mm->entries);
free(mm->rowLens);
free(mm);
}
MatrixMarket* MMRead(char* matPath){
FILE* fp = fopen(matPath, "r");
if (!fp){
perror("fopen");
return NULL;
}
MatrixMarket* out = calloc(1,sizeof(*out));
if (!out){
ERRPRINT("MMRead out malloc errd\n");
goto err;
}
//banner -> parse matrix specs
if (mm_read_banner(fp, &out->mcode) != 0) {
fprintf(stderr,"mm_read_banner err at:%s\n",matPath);
goto err;
}
//assert matrix is compatible with this app scope
if (MMCheck(out->mcode)) goto err;
//parse sizes
//TODO OVERCOME uint limitation?
if(mm_read_mtx_crd_size(fp,(uint*) &out->M, (uint*) &out->N, (uint*) &out->NZ)){
fprintf(stderr,"mm_read_mtx_crd_size err at %s:\n",matPath);
goto err;
}
if (!(out->rowLens = calloc(out->M,sizeof(*(out->rowLens))))){
ERRPRINT("MMRead:\trowLens calloc errd\n");
goto err;
}
if (!(out->entries = MMtoCOO(&out->NZ, fp, out->mcode,out->rowLens))){
ERRPRINTS("MAT PARSE TO CSR ERR at:%s\n",matPath);
goto err;
}
goto _end;
err:
freeMatrixMarket(out);
out = NULL;
_end:
fclose(fp);
return out;
}
////COO -> ANYTHING ELSE CONVERSION
int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){
int out = EXIT_FAILURE;
ulong idx;
long* _rowsLastCol = NULL; //for each row -> last added entry's columnIdx
ulong* rowsNextIdx = NULL; //for each row -> next entry progressive idx
if (!(rowsNextIdx = calloc(mat->M,sizeof(*rowsNextIdx)))){
ERRPRINT("MMtoCOO: rowsNextIdx calloc errd\n");
goto _end;
}
CONSISTENCY_CHECKS{ //alloc and init aux arr for entries sort check
if (!(_rowsLastCol = malloc(mat->M*sizeof(*_rowsLastCol)))){
ERRPRINT("MMtoCOO: _rowsLastCol malloc errd\n");
goto _end;
}
memset(_rowsLastCol,-1,mat->M*sizeof(*_rowsLastCol));
}
/*TODO OLD
* //get rowLens->IRP (partial), TODO moved MMtoCOO to avoid FULL rescan entries
* for (ulong i=0; i<mat->NZ; i++) mat->IRP[entries[i].row+1]++;
* memcpy(mat->RL,mat->IRP + 1,sizeof(*mat->IRP) * mat->M); //TODO in next ifdef
* for (ulong i=2; i<mat->M+1; i++) mat->IRP[i] += mat->IRP[i-1];
* OLD2: rowLens memcpy ... no just moved the pointer
* #ifdef ROWLENS
* memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef
* #endif
*/
//IRP: trasform rows lens as increments to build row index "pointer"
//0th -> 0 mandatory; 1th = 0th row len, ...., M+1th = end of Mth row
memcpy(mat->IRP+1,rowLens,sizeof(*rowLens) * mat->M);//init IRP with rows lens
for (ulong i=2; i<mat->M+1; i++) mat->IRP[i] += mat->IRP[i-1];
CONSISTENCY_CHECKS assert(mat->IRP[mat->M] == mat->NZ);
///FILL
//TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS
//sorted for each row (even nn sequential in @entries)
//entries write in CSR format
entry* e;
for (ulong i=0; i<mat->NZ; i++) {
e = entries+i;
CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE SORTED
/*#pragma message("COO sorting check enabled")*/
if (_rowsLastCol[e->row] >= (long) e->col){
ERRPRINTS("not sorted entry:%ld,%ld,%lf",e->row,e->col,e->val);
goto _end;
}
_rowsLastCol[e->row] = e->col;
}
idx = mat -> IRP[e->row] + rowsNextIdx[e->row]++;
mat -> AS[idx] = e->val;
mat -> JA[idx] = e->col;
}
out = EXIT_SUCCESS;
_end:
if(rowsNextIdx) free(rowsNextIdx);
if(_rowsLastCol) free(_rowsLastCol);
return out;
}
int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){
int out=EXIT_FAILURE;
ulong maxRow = 0, col, _ellEntriesTot, *rowsNextCol;
long* _rowsLastCol=NULL;
entry* e;
for (ulong i=0; i<mat->M; i++) maxRow = MAX(maxRow,rowLens[i]);
_ellEntriesTot = 2*mat->M*maxRow;
#ifdef LIMIT_ELL_SIZE
if ( _ellEntriesTot > ELL_MAX_ENTRIES ){
ERRPRINTS("Required entries %lu -> %lu uMB for the matrix exceed the "
"designated threashold of: %lu -> %lu MB for ellpack\n",
_ellEntriesTot,(sizeof(double)*_ellEntriesTot) >> 20,
ELL_MAX_ENTRIES,(sizeof(double)*ELL_MAX_ENTRIES) >> 20);
return EXIT_FAILURE;
}
#endif
//malloc aux vects
if (!(rowsNextCol = calloc(mat->M,sizeof(*rowsNextCol)))){
ERRPRINT("MMtoELL:\trowsNextCol calloc errd\n");
goto _end;
}
CONSISTENCY_CHECKS{ //alloc and init aux arr for entries SORT CHECK
if (!(_rowsLastCol = malloc(mat->M*sizeof(*_rowsLastCol)))){
ERRPRINT("MMtoELL:\trowsLastCol malloc errd\n");
goto _end;
}
memset(_rowsLastCol,-1,mat->M*sizeof(*_rowsLastCol));
}
///malloc dependant to MAX ROW LEN, err free in the caller
if (!(mat->AS = calloc(mat->M * maxRow, sizeof(*(mat->AS))))){
ERRPRINT("MMtoELL:\tELL->AS calloc errd\n");
goto _end;
} //zero init for auto rows residual fill with 0
if (!(mat->JA = calloc(mat->M * maxRow, sizeof(*(mat->JA))))){
ERRPRINT("MMtoELL:\tELL->JA calloc errd\n");
goto _end;
}
mat->MAX_ROW_NZ = maxRow;
/*#ifdef ROWLENS
*memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef
*#endif
*/
///FILL NZ
//TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS
//sorted for each row (even nn sequential in @entries)
for (ulong i=0; i<mat->NZ; i++){
e = entries + i;
CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE COLS SORTED for righe successive
/*#pragma message("COO sorting check enabled")*/
if (_rowsLastCol[e->row] >= (long) e->col){
ERRPRINTS("not sorted entry:%ld,%ld,%lf",
e->row,e->col,e->val);
goto _end;
}
_rowsLastCol[e->row] = e->col;
}
col = rowsNextCol[e->row]++; //place entry in its row's sequent spot
mat->AS[ IDX2D(e->row,col,maxRow) ] = e->val;
mat->JA[ IDX2D(e->row,col,maxRow) ] = e->col;
}
///FILL PAD
ulong padded = 0,paddedEntries = mat->M*mat->MAX_ROW_NZ;
for (ulong r=0; r<mat->M; r++){
for (ulong c=rowLens[r],j=IDX2D(r,c,maxRow); c<maxRow; c++,j++,padded++){
//mat->AS[j] = ELL_AS_FILLER; //TODO ALREADY DONE IN CALLOC
//mat->JA[j] = mat->JA[rowLens[r]-1]; //ELL_JA_FILLER; //TODO calloc CUDA benefit?
}
}
VERBOSE{
printf("padded %lu entries = %lf%% of NZ\n",padded,100*padded/(double) mat->NZ);
printf("ELL matrix of: %lu paddedEntries -> %lu MB of JA+AS\n",
paddedEntries,(paddedEntries*sizeof(*(mat->JA))+paddedEntries*sizeof(*(mat->AS))) >> 20);
}
out = EXIT_SUCCESS;
_end:
if(rowsNextCol) free(rowsNextCol);
if(_rowsLastCol) free(_rowsLastCol);
return out;
}
////wrapper MM -> specialized target
spmat* MMtoCSR(char* matPath){
spmat* mat = NULL;
MatrixMarket* mm = MMRead(matPath);
if (!mm){
ERRPRINT("MMtoCSR parse err\n");
return NULL;
}
if (!(mat = calloc(1,sizeof(*mat)))){
ERRPRINT("MMtoCSR: mat struct alloc errd");
goto err;
}
mat -> M = mm->M;
mat -> N = mm->N;
mat -> NZ= mm->NZ;
//alloc sparse matrix components
if (!(mat->IRP = calloc(mat->M+1,sizeof(*(mat->IRP))))){
ERRPRINT("MMtoCSR: IRP calloc err\n");
goto err;
}
////alloc core struct of CSR
if(!(mat->JA = malloc(mat->NZ*sizeof(*(mat->JA))))){
ERRPRINT("MMtoCSR: JA malloc err\n");
goto err;
}
if(!(mat->AS = malloc(mat->NZ*sizeof(*(mat->AS))))){
ERRPRINT("MMtoCSR: AS malloc err\n");
goto err;
}
if (COOtoCSR(mm->entries,mat,mm->rowLens)) goto err;
#ifdef ROWLENS
mat->RL = mm->rowLens;
mm->rowLens = NULL; //avoid free in @freeMatrixMarket
#endif
VERBOSE
printf("MMtoCSR: %lu NZ entries-> %lu MB of AS+JA+IRP\n",mat->NZ,
(mat->NZ*sizeof(*(mat->AS))+mat->NZ*sizeof(*(mat->JA))+(1+mat->M*sizeof(*(mat->IRP))))>>20);
goto _free;
err:
if (mat) freeSpmat(mat);
mat = NULL;
_free:
freeMatrixMarket(mm);
return mat;
}
spmat* MMtoELL(char* matPath){
spmat* mat = NULL;
MatrixMarket* mm = MMRead(matPath);
if (!mm){
ERRPRINT("MMtoELL: parse err\n");
return NULL;
}
if (!(mat = calloc(1,sizeof(*mat)))){
ERRPRINT("MMtoELL: mat struct alloc errd");
goto err;
}
////alloc core struct of CSR
mat -> M = mm->M;
mat -> N = mm->N;
mat -> NZ= mm->NZ;
if (COOtoELL(mm->entries,mat,mm->rowLens)) goto err;
#ifdef ROWLENS
mat->RL = mm->rowLens;
mm->rowLens = NULL; //avoid free in @freeMatrixMarket
#endif
goto _free;
err:
if(mat) freeSpmat(mat);
mat = NULL;
_free:
freeMatrixMarket(mm);
return mat;
}

@ -40,35 +40,4 @@ module psb_objhandle_mod
type, bind(c) :: psb_c_zspmat
type(c_ptr) :: item = c_null_ptr
end type psb_c_zspmat
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! sp3mm c code structs
! TODO : rename to conventions
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
type, bind(C, name=spmat) :: spmat_t
! number of non zeros and dimensions
integer(c_size_t) :: nz, m, n
! value array
real(c_double), allocatable :: as(:)
! columns array
integer(c_size_t), allocatable :: ja(:)
! row index pointers array
integer(c_size_t), allocatable :: irp(:)
! lengths of the rows
integer(c_size_t), allocatable :: rl(:)
! max value of rl
integer(c_size_t) :: max_row_nz
end type spmat_t
type, bind(C, name=CONFIG) :: config_t
! dimensions of the grid
integer(c_short) :: grid_rows, grid_cols
! how to compute symb mul (if required)
integer(c_int) :: symb_mm_row_impl_id
! thread num to use in OMP parallel region
integer(c_int) :: thread_num
! CHUNKS_DISTR_INTERF func pntr
type(c_ptr) :: chunk_distrb_func
end type config_t
end module psb_objhandle_mod
end type psb_c_zspmat
Loading…
Cancel
Save