added sp3mm4amg source code to the repository and created C wrapper routine to be used in the fortran interface
parent
1a4ae1e973
commit
0c88352530
@ -0,0 +1,7 @@
|
||||
{
|
||||
"fortran.fortls.path": "/usr/bin/fortls",
|
||||
"files.associations": {
|
||||
"sp3mm_csr_omp_multi.h": "c",
|
||||
"sp3mm_csr_omp_ub_generic.h": "c"
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
Copyright Andrea Di Iorio 2022
|
||||
This file is part of Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN
|
||||
Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Sp3MM_4_AMG_OMP_CUDA_C_FORTRAN. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//Get multiple implementation for C-Fortran indexing by re-define & re-include
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <alloca.h> //TODO quick hold few CSR cols partition sizes
|
||||
#include <omp.h>
|
||||
//UB version deps
|
||||
#include "Sp3MM_CSR_OMP_Multi.h"
|
||||
#include "SpMMUtilsMulti.h"
|
||||
#include "sparseUtilsMulti.h"
|
||||
#include "ompChunksDivide.h"
|
||||
#include "parser.h"
|
||||
#include "utils.h"
|
||||
#include "macros.h"
|
||||
#include "sparseMatrix.h"
|
||||
|
||||
#include "inlineExports.h"
|
||||
|
||||
//Symb version deps
|
||||
#include "Sp3MM_CSR_OMP_Multi.h"
|
||||
|
||||
|
||||
//global vars -> audit
|
||||
double Start,End,Elapsed,ElapsedInternal;
|
||||
|
||||
#define OFF_F 0
|
||||
#include "inlineExports_Generic.h"
|
||||
#include "Sp3MM_CSR_OMP_UB_Generic.c"
|
||||
#include "Sp3MM_CSR_OMP_Num_Generic.c"
|
||||
#undef OFF_F
|
||||
|
||||
#define OFF_F 1
|
||||
#include "inlineExports_Generic.h"
|
||||
#include "Sp3MM_CSR_OMP_UB_Generic.c"
|
||||
#include "Sp3MM_CSR_OMP_Num_Generic.c"
|
||||
#undef OFF_F
|
@ -0,0 +1,249 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*#pragma message( "compiling SpMM_CSR_OMP_Generic.c with OFF_F as:" STR(OFF_F) )*/
|
||||
#ifndef OFF_F
|
||||
#error generic implementation requires OFF_F defined
|
||||
#endif
|
||||
|
||||
#ifndef SP3MM_OMP_SYMB
|
||||
#define SP3MM_OMP_SYMB
|
||||
|
||||
//allocCSRSpMatSymbStep aux function for IRP set by symb step output
|
||||
static inline idx_t _setCSR_IRP_1DPartitioing(spmat* m, idx_t* rowSizes){
|
||||
idx_t r,cumulSize;
|
||||
for (r=0,cumulSize=0; r<m->M; cumulSize += rowSizes[r++])
|
||||
m->IRP[r] = cumulSize;
|
||||
DEBUGCHECKS assert(cumulSize == rowSizes[m->M]);
|
||||
m->IRP[m->M] = cumulSize;
|
||||
return cumulSize;
|
||||
}
|
||||
static inline idx_t _setCSR_IRP_2DPartitioing(spmat* m,idx_t* rowSizes,ushort gridCols){
|
||||
idx_t cumulSize=0;
|
||||
for (idx_t r=0,cumulSizeOld; r<m->M; r++){
|
||||
m->IRP[r] = cumulSize;
|
||||
for (ushort gc=0; gc<gridCols; gc++){
|
||||
cumulSizeOld = cumulSize;
|
||||
cumulSize += rowSizes[ IDX2D(r,gc,gridCols) ];
|
||||
//inplace update 2D rowParts len matrix as IRP like start offsets
|
||||
rowSizes[ IDX2D(r,gc,gridCols) ] = cumulSizeOld;
|
||||
}
|
||||
}
|
||||
DEBUGCHECKS assert(cumulSize == rowSizes[m->M*gridCols]);
|
||||
m->IRP[m->M] = cumulSize;
|
||||
return cumulSize;
|
||||
}
|
||||
/*
|
||||
* allocate CSR mat @m, setting up correctly IRP and other buffers allocations
|
||||
* if @gridCols == 1: (1D partitioning of @m) -> @rowsSizes will be an array of row lenghts
|
||||
* if @gridCols > 1: (2D partitioning of @m) -> @rowsSizes will be a matrix with
|
||||
* elem i,j = len of j-th colPartition (out of @gridCols) of i-th row
|
||||
*/
|
||||
static inline int allocCSRSpMatSymbStep(spmat* m,idx_t* rowSizes,ushort gridCols){
|
||||
//setup IRP and get cumul, whole size of @m
|
||||
if (!gridCols) return EXIT_FAILURE;
|
||||
idx_t cumulSize;
|
||||
if (gridCols == 1) cumulSize = _setCSR_IRP_1DPartitioing(m,rowSizes);
|
||||
/*if (gridCols > 1} -- static analizer miss unsignedness...*/
|
||||
else cumulSize = _setCSR_IRP_2DPartitioing(m,rowSizes,gridCols);
|
||||
m->IRP[m->M] = cumulSize;
|
||||
m->NZ = cumulSize;
|
||||
|
||||
if (!(m->AS = malloc(rowSizes[m->M] * sizeof(*m->AS))) ){
|
||||
ERRPRINT("allocCSRSpMatSymbStep m->AS malloc errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (!(m->JA = malloc(rowSizes[m->M] * sizeof(*m->JA))) ){
|
||||
ERRPRINT("allocCSRSpMatSymbStep m->JA malloc errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
//////////////////// COMPUTE CORE Sp[3]MM SYMB-NUMB PHASE //////////////////////////
|
||||
////////Sp3MM as 2 x SpMM
|
||||
///1D
|
||||
spmat* CAT(spmmRowByRow_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
|
||||
ACC_DENSE *accVects = NULL,*acc;
|
||||
SPMM_ACC* outAccumul=NULL;
|
||||
idx_t* rowsSizes = NULL;
|
||||
///init AB matrix with SPMM heuristic preallocation
|
||||
spmat* AB = allocSpMatrix(A->M,B->N);
|
||||
if (!AB) goto _err;
|
||||
///aux structures alloc
|
||||
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
|
||||
ERRPRINT("accVects init failed\n");
|
||||
goto _err;
|
||||
}
|
||||
///SYMBOLIC STEP
|
||||
if (!(rowsSizes = CAT(SpMM_Symb___,OFF_F) (cfg->symbMMRowImplID, A,B)))
|
||||
goto _err;
|
||||
if (allocCSRSpMatSymbStep(AB,rowsSizes,1)) goto _err;
|
||||
|
||||
///NUMERIC STEP
|
||||
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg);
|
||||
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
|
||||
#pragma omp parallel for schedule(runtime) private(acc)
|
||||
for (idx_t r=0; r<A->M; r++){ //row-by-row formulation
|
||||
acc = accVects + omp_get_thread_num();
|
||||
_resetAccVect(acc); //rezero for the next A row
|
||||
//for each A's row nnz, accumulate scalar vector product nnz.val * B[nnz.col]
|
||||
/* direct use of sparse scalar vector multiplication
|
||||
for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; ja<A->IRP[r+1]-OFF_F; ja++){
|
||||
ca = A->JA[ja] - OFF_F;
|
||||
jb = B->IRP[ca] - OFF_F;
|
||||
bRowLen = B->IRP[ca+1] - B->IRP[ca];
|
||||
CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc);
|
||||
} */
|
||||
for (ulong ja=A->IRP[r]-OFF_F; ja<A->IRP[r+1]-OFF_F; ja++) //row-by-row formul
|
||||
CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc);
|
||||
//direct sparsify: trasform accumulated dense vector to a CSR row
|
||||
sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
|
||||
}
|
||||
#if OFF_F != 0
|
||||
C_FortranShiftIdxs(AB);
|
||||
#endif
|
||||
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
|
||||
DEBUG checkOverallocPercent(rowsSizes,AB);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
if(AB) freeSpmat(AB);
|
||||
AB=NULL; //nothing'll be returned
|
||||
_free:
|
||||
free(rowsSizes);
|
||||
if(accVects) freeAccsDense(accVects,cfg->threadNum);
|
||||
if(outAccumul) freeSpMMAcc(outAccumul);
|
||||
|
||||
return AB;
|
||||
|
||||
}
|
||||
spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
|
||||
ACC_DENSE *accVects = NULL,*acc;
|
||||
SPMM_ACC* outAccumul=NULL;
|
||||
idx_t* rowsSizes = NULL;
|
||||
///init AB matrix with SPMM heuristic preallocation
|
||||
spmat* AB = allocSpMatrix(A->M,B->N);
|
||||
if (!AB) goto _err;
|
||||
///aux structures alloc
|
||||
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
|
||||
ERRPRINT("accVects init failed\n");
|
||||
goto _err;
|
||||
}
|
||||
///SYMBOLIC STEP
|
||||
if (!(rowsSizes = CAT(SpMM_Symb___,OFF_F) (cfg->symbMMRowImplID, A,B)))
|
||||
goto _err;
|
||||
if (allocCSRSpMatSymbStep(AB,rowsSizes,1))
|
||||
goto _err;
|
||||
|
||||
///NUMERIC STEP
|
||||
//perform Gustavson over rows blocks -> M / @cfg->gridRows
|
||||
ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows;
|
||||
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg);
|
||||
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
|
||||
ulong b,startRow,block; //omp for aux vars
|
||||
#pragma omp parallel for schedule(runtime) private(acc,startRow,block)
|
||||
for (b=0; b < cfg->gridRows; b++){
|
||||
block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem);
|
||||
startRow = UNIF_REMINDER_DISTRI_STARTIDX(b,rowBlock,rowBlockRem);
|
||||
for (idx_t r=startRow; r<startRow+block; r++){
|
||||
acc = accVects + omp_get_thread_num();
|
||||
_resetAccVect(acc); //rezero for the next A row
|
||||
//for each A's row nnz, accumulate scalar vector product nnz.val * B[nnz.col]
|
||||
/* direct use of sparse scalar vector multiplication
|
||||
for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; ja<A->IRP[r+1]-OFF_F; ja++){
|
||||
ca = A->JA[ja] - OFF_F;
|
||||
jb = B->IRP[ca] - OFF_F;
|
||||
bRowLen = B->IRP[ca+1] - B->IRP[ca];
|
||||
CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc);
|
||||
} */
|
||||
for (ulong ja=A->IRP[r]-OFF_F; ja<A->IRP[r+1]-OFF_F; ja++) //row-by-row formul
|
||||
CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc);
|
||||
//direct sparsify: trasform accumulated dense vector to a CSR row
|
||||
sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
|
||||
}
|
||||
}
|
||||
#if OFF_F != 0
|
||||
C_FortranShiftIdxs(AB);
|
||||
#endif
|
||||
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
|
||||
DEBUG checkOverallocPercent(rowsSizes,AB);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
if(AB) freeSpmat(AB);
|
||||
AB=NULL; //nothing'll be returned
|
||||
_free:
|
||||
if(rowsSizes) free(rowsSizes);
|
||||
if(accVects) freeAccsDense(accVects,cfg->threadNum);
|
||||
if(outAccumul) freeSpMMAcc(outAccumul);
|
||||
|
||||
return AB;
|
||||
|
||||
}
|
||||
spmat* CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
|
||||
DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N);
|
||||
idx_t* bColOffsets = NULL; //B group columns starting offset for each row
|
||||
ACC_DENSE *accVectors=NULL,*accV;
|
||||
SPACC* accRowPart;
|
||||
spmat* AB = allocSpMatrix(A->M,B->N);
|
||||
SPMM_ACC* outAccumul=NULL;
|
||||
idx_t *rowsPartsSizes=NULL, *rowSizes=NULL; //for rows' cols partition, correct len
|
||||
if (!AB) goto _err;
|
||||
//2D indexing aux vars
|
||||
ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
|
||||
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
|
||||
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
|
||||
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables
|
||||
////get bColOffsets for B column groups
|
||||
if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols)))
|
||||
goto _err;
|
||||
///TODO TODO
|
||||
_err:
|
||||
if (AB) freeSpmat(AB);
|
||||
AB = NULL;
|
||||
_free:
|
||||
free(rowsPartsSizes);
|
||||
free(bColOffsets);
|
||||
if (accVectors) freeAccsDense(accVectors,gridSize);
|
||||
if (outAccumul) freeSpMMAcc(outAccumul);
|
||||
|
||||
return AB;
|
||||
}
|
||||
/* TODO
|
||||
///2D
|
||||
//PARTITIONS NOT ALLOCATED
|
||||
spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
|
||||
///SP3MM
|
||||
spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,SPMM_INTERF spmm){
|
||||
////////Sp3MM direct
|
||||
///1D
|
||||
spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,SPMM_INTERF spmm){
|
||||
*/
|
@ -0,0 +1,594 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* CSR Sp[3]MM Symbolic step implementations
|
||||
* target: compute the output matrix size and the row lens for preallocation
|
||||
* direct write out partial results
|
||||
* See interfaces in respective header
|
||||
*/
|
||||
|
||||
|
||||
/*#pragma message( "compiling Sp3MM_CSR_OMP_Symb_Generic.c with config as:" \
|
||||
STR(OFF_F) " - " STR(OUT_IDXS) " - " STR(COL_PARTS) )*/
|
||||
#ifndef OFF_F
|
||||
#error generic implementation requires OFF_F defined
|
||||
#endif
|
||||
|
||||
///setup aux macros for different signatures implementation via #if arith expr
|
||||
#pragma push_macro("OUT_IDXS")
|
||||
#pragma push_macro("_OUT_IDXS")
|
||||
#pragma push_macro("COL_PARTS")
|
||||
#pragma push_macro("_COL_PARTS")
|
||||
|
||||
#ifdef OUT_IDXS
|
||||
#define _OUT_IDXS TRUE
|
||||
#else
|
||||
#define _OUT_IDXS FALSE
|
||||
#define OUT_IDXS _UNDEF
|
||||
#endif
|
||||
#ifdef COL_PARTS
|
||||
#define _COL_PARTS TRUE
|
||||
#else
|
||||
#define _COL_PARTS FALSE
|
||||
#define COL_PARTS _UNDEF
|
||||
#endif
|
||||
///
|
||||
|
||||
//////SpMM - rowByrow
|
||||
///1row->matrix->outRow
|
||||
//RBTREE based
|
||||
/*
|
||||
* Compute symbolic product of (nnz indexes of) row @aRowJA and matrix @b
|
||||
* insert nnz indexes of the mul. result row as nodes in a rbtree rooted at @root
|
||||
* with nodes in @nodes which have to be enough for the mul result row (use an UB)
|
||||
* Retuns: multiplication result row NNZ number,se CONFIG_MACROS below for more
|
||||
*
|
||||
* CONFIG_MACROS:
|
||||
* if _OUT_IDXS == TRUE return mul.result row nnz idxs in @outIdxs
|
||||
* ifdef: OUT_IDXS_RBTREE_NODES: nnz indexes returned inplace sorting rbtree
|
||||
* as nnz indexes(JA) of the mul result row
|
||||
* else: stop at returning the mul. result row lenght
|
||||
* if _COL_PARTS == TRUE return the number of nonzero elements in
|
||||
* in each of the @gridCols column partitions inside @rowColPartsLens
|
||||
* OFF_F: offset back indexes from fortran
|
||||
* TODO also output indexes are shifted (see c_b )
|
||||
*/
|
||||
static inline idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
idx_t* aRowJA, idx_t aRowLen, spmat* b,rbRoot* root, rbNode* nodes
|
||||
#if _OUT_IDXS == TRUE && !defined OUT_IDXS_RBTREE_NODES
|
||||
,idx_t* outIdxs
|
||||
#endif
|
||||
#if _COL_PARTS == TRUE
|
||||
,ushort gridCols,idx_t* rowColPartsLens
|
||||
#endif
|
||||
)
|
||||
{
|
||||
//Compute resulting ab's row non zero indexes and total lenght
|
||||
idx_t abRowLen = 0; //mul.result row len, return value
|
||||
for ( idx_t i=0,c_a,inserted; i < aRowLen; i++ ){ //for each entry in a's row
|
||||
c_a = aRowJA[i]-OFF_F;
|
||||
//gather diffrent nnz indexes in corresponding b's `c_a`th row
|
||||
for ( idx_t j = b->IRP[c_a]-OFF_F,c_b; j < b->IRP[c_a+1]-OFF_F; j++ ){
|
||||
c_b = b->JA[j]-OFF_F;
|
||||
//check if c_b is nonzero index for mul.result row
|
||||
inserted = rbInsertNewKey (root, nodes+abRowLen, c_b);
|
||||
abRowLen += inserted; //inserted needed just after this
|
||||
/* LESS EFFICIENT THEN BELOW (here no memory of last colPart)
|
||||
#if _COL_PARTS == TRUE //keep track of which col partition is c_b in
|
||||
if (inserted)
|
||||
rowColPartsLens[ matchingUnifRangeIdx(c_b, b->N, gridCols) ]++;
|
||||
#endif */
|
||||
}
|
||||
}
|
||||
#if _OUT_IDXS == T && defined OUT_IDXS_RBTREE_NODES
|
||||
/* return the non zero indexes of the mul.result row
|
||||
* sorting inplace the nodes inserted in the rbtree */
|
||||
sortRbNode(nodes,abRowLen);
|
||||
#elif _OUT_IDXS == T || _COL_PARTS == T
|
||||
uint i=0;
|
||||
idx_t k;
|
||||
#if _COL_PARTS == T
|
||||
//colParts aux vars
|
||||
idx_t _colBlock = abRowLen / gridCols, _colBlockRem = abRowLen % gridCols;
|
||||
ushort gc=0;
|
||||
idx_t gcStartCol = unifRemShareStart(gc,_colBlock,_colBlockRem);
|
||||
idx_t gcEndCol = unifRemShareEnd(gc,_colBlock,_colBlockRem);
|
||||
#endif //_COL_PARTS == T
|
||||
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
|
||||
k = rb_entry(n,rbNode,rb)->key;
|
||||
#if _OUT_IDXS == T
|
||||
//return the mul.result nnz index inside the rbNodes
|
||||
outIdxs[ i++ ] = k;
|
||||
#endif
|
||||
#if _COL_PARTS == T
|
||||
while (k >= gcEndCol ){ //see if the idx is in another col partition
|
||||
// TODO also = since gcEndCol as k is 0based
|
||||
gcEndCol = unifRemShareEnd(gc ,_colBlock, _colBlockRem);
|
||||
gc++;
|
||||
DEBUGCHECKS{ assert( gc < gridCols ); }
|
||||
}
|
||||
rowColPartsLens[gc]++;
|
||||
#endif //_COL_PARTS == T
|
||||
}
|
||||
#endif //_OUT_IDXS == T ... _COL_PARTS == T
|
||||
/*DEBUGCHECKS{ //TODO PRINT NNZ INDEXES FOR MANUAL (PAINFUL CHECK)
|
||||
idx_t k;
|
||||
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
|
||||
k = rb_entry(n,rbNode,rb)->key;
|
||||
printf("%lu, ",k);
|
||||
}
|
||||
printf("\n");
|
||||
}*/
|
||||
|
||||
return abRowLen;
|
||||
}
|
||||
|
||||
//SPVECT_IDX_DENSE_MAP based TODO double implementation for trick syntax folding here...
|
||||
/*
|
||||
* SPVECT_IDX_DENSE_MAP based, as SpMM_Row_Symb_Rbtree but with idxMap aux idx keeping
|
||||
* CONFIG_MACROS (new)
|
||||
* IDX_RMUL_SYMB_RBTREE && ( _OUT_IDXS == T || _COL_PARTS == T ):
|
||||
* (tmp) symb mult out indexes will be kept via a rbtree
|
||||
* otherwise directly in the out array appending them and then sorting them
|
||||
* (potentially same n log n)
|
||||
*/
|
||||
static inline idx_t CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
idx_t* aRowJA, idx_t aRowLen, spmat* b, SPVECT_IDX_DENSE_MAP* idxsMapAcc
|
||||
#if _OUT_IDXS == TRUE
|
||||
,idx_t* outIdxs
|
||||
#endif
|
||||
#if ( _OUT_IDXS == TRUE && IDX_RMUL_SYMB_RBTREE == T ) || _COL_PARTS == T
|
||||
,rbRoot* root, rbNode* nodes
|
||||
#endif // _OUT_IDXS == TRUE
|
||||
#if _COL_PARTS == TRUE
|
||||
,ushort gridCols,idx_t* rowColPartsLens
|
||||
#endif
|
||||
)
|
||||
{
|
||||
//Compute resulting ab's row non zero indexes and total lenght
|
||||
idx_t abRowLen = 0; //mul.result row len, return value
|
||||
for ( idx_t i=0,c_a,inserted; i < aRowLen; i++ ){ //for each entry in a's row
|
||||
c_a = aRowJA[i]-OFF_F;
|
||||
//gather diffrent nnz indexes in corresponding b's `c_a`th row
|
||||
for ( idx_t j = b->IRP[c_a]-OFF_F,c_b; j < b->IRP[c_a+1]-OFF_F; j++ ){
|
||||
c_b = b->JA[j]-OFF_F;
|
||||
//check if c_b is nonzero index for mul.result row
|
||||
inserted = spVect_idx_in(c_b,idxsMapAcc);
|
||||
#if _OUT_IDXS == T || _COL_PARTS == T //idxs HAS TO be accumulated
|
||||
if (inserted)
|
||||
#if IDX_RMUL_SYMB_RBTREE == T || _OUT_IDXS == F //add it in a RBTREE struct
|
||||
rbInsertNewKey (root, nodes+idxsMapAcc->len, c_b);
|
||||
#else //append it, then sort
|
||||
outIdxs[idxsMapAcc->len] = c_b;
|
||||
#endif //IDX_RMUL_SYMB_RBTREE == T //how accumulated key c_b
|
||||
#endif //#if _OUT_IDXS == T || _COL_PARTS == T
|
||||
}
|
||||
}
|
||||
abRowLen = idxsMapAcc->len;
|
||||
//gather idxs or their sparsity struct in output row
|
||||
#if _OUT_IDXS == T || _COL_PARTS == T
|
||||
idx_t j = 0,k;
|
||||
#if _COL_PARTS == T
|
||||
//colParts aux vars
|
||||
idx_t _colBlock = abRowLen / gridCols, _colBlockRem = abRowLen % gridCols;
|
||||
ushort gc = 0;
|
||||
idx_t gcStartCol = unifRemShareStart(gc,_colBlock,_colBlockRem);
|
||||
idx_t gcEndCol = unifRemShareEnd(gc,_colBlock,_colBlockRem);
|
||||
#endif
|
||||
#if IDX_RMUL_SYMB_RBTREE == T || _OUT_IDXS == F ///idxs recorded in a aux rbtree
|
||||
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
|
||||
k = rb_entry(n,rbNode,rb)->key;
|
||||
#if _OUT_IDXS == T
|
||||
outIdxs[ j++ ] = k; //record ordered key sotred from aux rbtree
|
||||
#endif
|
||||
#else ///idxs recorded in aux append array
|
||||
sort_idx_t(outIdxs,abRowLen);
|
||||
for (; j < abRowLen; j++){
|
||||
k = outIdxs[j]; //(OSS) already ordered in outIndexes arr
|
||||
#endif //IDX_RMUL_SYMB_RBTREE == T
|
||||
#if _COL_PARTS == T
|
||||
while (k >= gcEndCol ){ //see if the idx is in another col partition
|
||||
// TODO also = since gcEndCol as k is 0based
|
||||
gcEndCol = unifRemShareEnd(gc ,_colBlock, _colBlockRem);
|
||||
gc++;
|
||||
DEBUGCHECKS{ assert( gc < gridCols ); }
|
||||
}
|
||||
rowColPartsLens[gc]++;
|
||||
#endif //_COL_PARTS == T
|
||||
}
|
||||
#endif //_OUT_IDXS == T ... _COL_PARTS == T
|
||||
return abRowLen;
|
||||
}
|
||||
|
||||
//switch among 2 row_symb_XXX implemenetation aux
|
||||
/*
|
||||
* SpMM single row symbolic computation
|
||||
* select one implementation via @implID
|
||||
* among SpMM_Row_Symb_Rbtree or SpMM_Row_Symb_IdxMap
|
||||
* args will be forwared accordingly
|
||||
*/
|
||||
static inline idx_t CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
ROW_MMSYM_IMPL_MODE implID, idx_t* aRowJA, idx_t aRowLen, spmat* b,
|
||||
rbRoot* root, rbNode* nodes, SPVECT_IDX_DENSE_MAP* idxsMapAcc
|
||||
#if _OUT_IDXS == TRUE
|
||||
,idx_t* outIdxs
|
||||
#endif
|
||||
#if _COL_PARTS == TRUE
|
||||
,ushort gridCols,idx_t* rowColPartsLens
|
||||
#endif
|
||||
)
|
||||
{
|
||||
if (implID == RBTREE) {
|
||||
return CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
aRowJA,aRowLen,b,root,nodes
|
||||
#if _OUT_IDXS == TRUE && !defined OUT_IDXS_RBTREE_NODES
|
||||
,outIdxs
|
||||
#endif
|
||||
#if _COL_PARTS == TRUE
|
||||
,gridCols,rowColPartsLens
|
||||
#endif
|
||||
);
|
||||
}
|
||||
else { //IDXMAP
|
||||
return CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
aRowJA,aRowLen,b,idxsMapAcc
|
||||
#if _OUT_IDXS == TRUE
|
||||
,outIdxs
|
||||
#endif
|
||||
#if ( _OUT_IDXS == TRUE && IDX_RMUL_SYMB_RBTREE == T ) || _COL_PARTS == T
|
||||
,root, nodes
|
||||
#endif
|
||||
#if _COL_PARTS == TRUE
|
||||
,gridCols, rowColPartsLens
|
||||
#endif
|
||||
);
|
||||
}
|
||||
}
|
||||
///SpMM row-by-row
|
||||
idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
ROW_MMSYM_IMPL_MODE symbRowImplID, spmat* a, spmat* b
|
||||
#if _OUT_IDXS == TRUE
|
||||
,idx_t*** outIdxs
|
||||
#endif
|
||||
#if _COL_PARTS == TRUE
|
||||
,ushort gridCols, idx_t** rowColPartsLens
|
||||
#endif
|
||||
)
|
||||
{
|
||||
|
||||
///initial allocations
|
||||
rbRoot* rbRoots = NULL;
|
||||
rbNode* rbNodes = NULL;
|
||||
SPVECT_IDX_DENSE_MAP* idxsMapAccs = NULL;
|
||||
idx_t *rowLens=NULL,*upperBoundedRowsLens=NULL,*upperBoundedSymMat=NULL;
|
||||
idx_t maxRowLen=0;
|
||||
int rbTreeUsed = (symbRowImplID == RBTREE ||
|
||||
(IDX_RMUL_SYMB_RBTREE && (_COL_PARTS || _OUT_IDXS)) );
|
||||
|
||||
if ( !(rowLens = malloc(sizeof(*rowLens) * (a->M+1))) ){
|
||||
ERRPRINT("SpMM_Symb_ rowLens malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
if (_OUT_IDXS == TRUE || rbTreeUsed ){
|
||||
if (!(upperBoundedRowsLens = CAT(spMMSizeUpperbound_,OFF_F)(a,b)))
|
||||
goto _err;
|
||||
}
|
||||
#if _OUT_IDXS == TRUE
|
||||
if (!(*outIdxs = malloc(sizeof(**outIdxs) * a->M))){
|
||||
ERRPRINT("SpMM_Symb_ outIdxs malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(upperBoundedSymMat = malloc(
|
||||
sizeof(*upperBoundedSymMat)*upperBoundedRowsLens[a->M]))){
|
||||
ERRPRINT("SpMM_Symb_ upperBoundedSymMat malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
//write rows' start pointer from full matrix JA allocated
|
||||
for (idx_t i=0,cumul=0; i<a->M; cumul += upperBoundedRowsLens[i++])
|
||||
*outIdxs[i] = upperBoundedSymMat + cumul;
|
||||
#endif //#if _OUT_IDXS == TRUE
|
||||
#if _COL_PARTS == TRUE
|
||||
if (!(*rowColPartsLens = malloc(a->M * gridCols * sizeof(**rowColPartsLens)))){
|
||||
ERRPRINT("SpMM_Symb_ rowColPartsLens malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
#endif //_COL_PARTS
|
||||
uint maxThreads = omp_get_max_threads(); //TODO FROM CFG
|
||||
//index keeping aux struct
|
||||
//rbtree implementation or idxMap with aux of symbTree for tmp outIdx keeping
|
||||
if ( rbTreeUsed ){
|
||||
maxRowLen = reductionMaxSeq(upperBoundedRowsLens, a->M);
|
||||
//rbTrees for index keeping
|
||||
rbRoots = malloc(maxThreads * sizeof(*rbRoots));
|
||||
rbNodes = calloc(maxThreads * maxRowLen, sizeof(*rbNodes));
|
||||
if( !rbRoots || !rbNodes ){
|
||||
ERRPRINT("SpMM_Symb_ threads' aux rbTree mallocs errd\n");
|
||||
goto _err;
|
||||
}
|
||||
//init roots
|
||||
for (uint i=0; i<maxThreads; i++)
|
||||
rbRoots[i] = RB_ROOT_CACHED;
|
||||
}
|
||||
if (symbRowImplID == IDXMAP){ //idxMap implementation && not outIdxs via rbtree
|
||||
idxsMapAccs = calloc(maxThreads, sizeof(*idxsMapAccs));
|
||||
if (!idxsMapAccs){
|
||||
ERRPRINT("SpMM_Symb_ idxsMapAccs calloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
//init idxs maps
|
||||
for (uint i=0; i<maxThreads; i++){
|
||||
if(initSpVectIdxDenseAcc(b->N,idxsMapAccs+i)) goto _err;
|
||||
}
|
||||
}
|
||||
///rows parallel compute
|
||||
idx_t* aRow;
|
||||
idx_t aRowLen,rLen,abCumulLen=0;
|
||||
int tid;
|
||||
rbRoot* tRoot; rbNode* tNodes;
|
||||
SPVECT_IDX_DENSE_MAP* tIdxsMapAcc = NULL;
|
||||
#pragma omp parallel for schedule(static) \
|
||||
private(aRow,aRowLen,rLen, tRoot,tNodes,tid) reduction(+:abCumulLen)
|
||||
for(idx_t r=0; r<a->M; r++){
|
||||
aRow = a->JA + a->IRP[r]-OFF_F;
|
||||
aRowLen = a->IRP[r+1] - a->IRP[r];
|
||||
tid = omp_get_thread_num();
|
||||
//TODO low overhead pointer airth can be avoided with if (symbRowImplID .. && )
|
||||
tIdxsMapAcc = idxsMapAccs + tid;
|
||||
tRoot = rbRoots + tid;
|
||||
tNodes = rbNodes + tid * maxRowLen;
|
||||
|
||||
rLen = CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
symbRowImplID, aRow, aRowLen, b, tRoot,tNodes, tIdxsMapAcc
|
||||
#if _OUT_IDXS == TRUE
|
||||
,*outIdxs[r]
|
||||
#endif
|
||||
#if _COL_PARTS == TRUE
|
||||
,gridCols, (*rowColPartsLens) + IDX2D(r,0,gridCols)
|
||||
#endif
|
||||
);
|
||||
rowLens[r] = rLen;
|
||||
abCumulLen += rLen;
|
||||
///reset symb idxs keeping aux structs
|
||||
if (symbRowImplID==RBTREE || (IDX_RMUL_SYMB_RBTREE && (_COL_PARTS || _OUT_IDXS))){
|
||||
*tRoot = RB_ROOT_CACHED;
|
||||
memset(tNodes,0,rLen * sizeof(*tNodes));
|
||||
}
|
||||
if (symbRowImplID == IDXMAP) _resetIdxMap(tIdxsMapAcc);
|
||||
|
||||
}
|
||||
rowLens[a->M] = abCumulLen;
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
free(rowLens);
|
||||
#if _OUT_IDXS == T
|
||||
if (outIdxs) free(*outIdxs);
|
||||
#endif
|
||||
#if _COL_PARTS == T
|
||||
if (rowColPartsLens) free(*rowColPartsLens);
|
||||
#endif
|
||||
rowLens = NULL;
|
||||
_free:
|
||||
free(upperBoundedRowsLens);
|
||||
free(upperBoundedSymMat);
|
||||
free(rbRoots);
|
||||
free(rbNodes);
|
||||
if (idxsMapAccs){
|
||||
for (uint i=0; i<maxThreads; i++) free(idxsMapAccs[i].idxsMap);
|
||||
free(idxsMapAccs);
|
||||
}
|
||||
|
||||
return rowLens;
|
||||
}
|
||||
|
||||
|
||||
///Sp3MM - rowByrowByrow ////////////////////////////////////////////////////////
|
||||
//TODO COL_PARTS VERSION?
|
||||
#if _OUT_IDXS == TRUE && _COL_PARTS == FALSE
|
||||
idx_t CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F)
|
||||
(
|
||||
ROW_MMSYM_IMPL_MODE symbMMRowImplID,idx_t* aRowJA,idx_t aRowLen,spmat* b,spmat* c,
|
||||
rbRoot* root,rbNode* nodes, SPVECT_IDX_DENSE_MAP* idxsMapAcc,idx_t* abRowJATmp
|
||||
#if _OUT_IDXS == TRUE && !defined OUT_IDXS_RBTREE_NODES
|
||||
,idx_t* outIdxs
|
||||
#endif
|
||||
)
|
||||
{
|
||||
struct rb_node* n;
|
||||
idx_t abRowLen = CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(symbMMRowImplID,aRowJA,aRowLen,b,root,nodes,idxsMapAcc,abRowJATmp);
|
||||
cleanRbNodes(root, nodes, abRowLen);
|
||||
idx_t abcRowLen = CAT4(SpMM_Row_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(symbMMRowImplID,abRowJATmp,abRowLen,c,root,nodes,idxsMapAcc,abRowJATmp);
|
||||
|
||||
#if _OUT_IDXS == TRUE
|
||||
#ifndef OUT_IDXS_RBTREE_NODES
|
||||
//return the mul.result nnz index inside the rbNodes
|
||||
uint i=0;
|
||||
//rbNodeOrderedVisit(n,root) outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key;
|
||||
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
|
||||
outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key;
|
||||
}
|
||||
#else
|
||||
/* return the non zero indexes of the mul.result row
|
||||
* sorting inplace the nodes inserted in the rbtree */
|
||||
sortRbNode(nodes,abcRowLen);
|
||||
#endif
|
||||
#endif
|
||||
return abcRowLen;
|
||||
}
|
||||
|
||||
idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F)
|
||||
(
|
||||
ROW_MMSYM_IMPL_MODE symbMMRowImplID, spmat* a, spmat* b, spmat* c
|
||||
#if _OUT_IDXS == TRUE
|
||||
,idx_t*** outIdxs
|
||||
#endif
|
||||
)
|
||||
{
|
||||
//idxs keeping aux buffs
|
||||
idx_t* abRowsJATmp = NULL;
|
||||
rbRoot* rbRoots = NULL; rbNode* rbNodes = NULL;
|
||||
SPVECT_IDX_DENSE_MAP* idxsMapAccs = NULL;
|
||||
|
||||
idx_t *abUpperBoundedRowsLens = NULL, *upperBoundedSymMat = NULL;
|
||||
///initial allocations
|
||||
idx_t* rowLens = malloc(sizeof(*rowLens) * (a->M +1) ); //to return
|
||||
if (!rowLens){
|
||||
ERRPRINT("SpMM_Symb_ rowLens malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
|
||||
#if _OUT_IDXS == TRUE
|
||||
if (!(*outIdxs = malloc(sizeof(**outIdxs) * a->M))){
|
||||
ERRPRINT("SpMM_Symb_ outIdxs malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(abUpperBoundedRowsLens = CAT(spMMSizeUpperbound_,OFF_F)(a,b)))
|
||||
goto _err;
|
||||
/*TODO TODO instead of doing one sym product first to have a correct UB
|
||||
* use an heuristics here to get output matrix size
|
||||
*/
|
||||
idx_t abcUBSize = abUpperBoundedRowsLens[a->M] * SP3MM_UB_HEURISTIC;
|
||||
if (!(upperBoundedSymMat=malloc(sizeof(*upperBoundedSymMat)*abcUBSize))){
|
||||
ERRPRINT("SpMM_Symb_ upperBoundedSymMat malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
//TODO heuristic TO UB rows bounds ... require compacting copy
|
||||
for (idx_t i=0,cumul=0; i<a->M;
|
||||
cumul += SP3MM_UB_HEURISTIC * abUpperBoundedRowsLens[i++])
|
||||
*outIdxs[i] = upperBoundedSymMat + cumul;
|
||||
#endif //#if _OUT_IDXS == TRUE
|
||||
//rbTrees for index keeping
|
||||
uint maxThreads = omp_get_max_threads(); //TODO FROM CFG
|
||||
idx_t abMaxRowLen = reductionMaxSeq(abUpperBoundedRowsLens, a->M);
|
||||
#ifdef HEURISTICS_UB
|
||||
idx_t maxRowLenUB = abMaxRowLen * SP3MM_UB_HEURISTIC; //TODO UB HEURISTC
|
||||
#else
|
||||
idx_t maxRowLenUB = c->N;
|
||||
#endif //HEURISTICS_UB
|
||||
|
||||
if (!(abRowsJATmp = malloc(maxThreads*maxRowLenUB*sizeof(*abRowsJATmp)) ) ){
|
||||
ERRPRINT("Sp3MM_Symb_ abRowsJATmp malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
if (symbMMRowImplID == RBTREE || IDX_RMUL_SYMB_RBTREE ){
|
||||
rbNodes = malloc(maxThreads * maxRowLenUB * sizeof(*rbNodes));
|
||||
rbRoots = malloc(maxThreads * sizeof(*rbRoots));
|
||||
if (!rbRoots || !rbNodes ){
|
||||
ERRPRINT("Sp3MM_Symb_ rbRoots || rbNodes malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
//init roots
|
||||
for (uint i=0; i<maxThreads; i++) rbRoots[i] = RB_ROOT_CACHED;
|
||||
if (!( rbNodes = malloc(maxThreads * maxRowLenUB * sizeof(*rbNodes)) )){
|
||||
ERRPRINT("Sp3MM_Symb_ rbNodes malloc errdn\n");
|
||||
goto _err;
|
||||
}
|
||||
}
|
||||
if (symbMMRowImplID == IDXMAP){
|
||||
if (!( idxsMapAccs = calloc(maxThreads,sizeof(*idxsMapAccs)) )){
|
||||
ERRPRINT("Sp3MM_Symb_ idxsMapAccs calloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
//init idxs maps
|
||||
for (uint i=0; i<maxThreads; i++){
|
||||
if(initSpVectIdxDenseAcc(c->N,idxsMapAccs+i)) goto _err;
|
||||
}
|
||||
}
|
||||
///rows parallel compute
|
||||
idx_t* aRow;
|
||||
idx_t aRowLen,rLen,outCumulLen=0;
|
||||
//threads local pointers
|
||||
int tid;
|
||||
rbRoot* tRoot;
|
||||
rbNode* tNodes;
|
||||
SPVECT_IDX_DENSE_MAP* tIdxsMapAcc;
|
||||
idx_t* tABRowJATmp;
|
||||
|
||||
#pragma omp parallel for schedule(static) \
|
||||
private(aRow,aRowLen,rLen, tRoot,tNodes,tid) reduction(+:outCumulLen)
|
||||
for(idx_t r=0; r<a->M; r++){
|
||||
aRow = a->JA + a->IRP[r]-OFF_F;
|
||||
aRowLen = a->IRP[r+1] - a->IRP[r];
|
||||
tid = omp_get_thread_num();
|
||||
tRoot = rbRoots + tid;
|
||||
tNodes = rbNodes + tid * maxRowLenUB;
|
||||
tIdxsMapAcc = NULL; //TODO
|
||||
tABRowJATmp = abRowsJATmp + tid * maxRowLenUB;
|
||||
rLen = CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F)
|
||||
(symbMMRowImplID, aRow,aRowLen,b,c,tRoot,tNodes,tIdxsMapAcc,tABRowJATmp,
|
||||
#if _OUT_IDXS == TRUE
|
||||
*outIdxs[r]
|
||||
#endif
|
||||
);
|
||||
outCumulLen += rLen;
|
||||
rowLens[r] = rLen;
|
||||
|
||||
}
|
||||
goto _free;
|
||||
_err:
|
||||
free(rowLens);
|
||||
#if _OUT_IDXS == T
|
||||
if (*outIdxs) free(*outIdxs);
|
||||
#endif
|
||||
rowLens = NULL;
|
||||
_free:
|
||||
free(abUpperBoundedRowsLens);
|
||||
free(upperBoundedSymMat);
|
||||
free(rbRoots);
|
||||
free(rbNodes);
|
||||
free(abRowsJATmp);
|
||||
|
||||
return rowLens;
|
||||
}
|
||||
|
||||
#endif //#if !defined COL_PARTS && defined OUT_IDXS
|
||||
|
||||
|
||||
///restore aux macros entry state
|
||||
//#undef _OUT_ID
|
||||
//#undef _COL_PARTS
|
||||
#pragma pop_macro("OUT_IDXS")
|
||||
#pragma pop_macro("_OUT_IDXS")
|
||||
#pragma pop_macro("COL_PARTS")
|
||||
#pragma pop_macro("_COL_PARTS")
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "macros.h"
|
||||
#include "config.h"
|
||||
#include "sparseMatrix.h"
|
||||
#include "linuxK_rbtree_minimalized.h"
|
||||
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Multi.h"
|
||||
|
||||
#include "sparseUtilsMulti.h"
|
||||
#include "utils.h"
|
||||
#include <omp.h>
|
||||
|
||||
//inline exports
|
||||
//ushort matchingUnifRangeIdx(idx_t idx, idx_t size, ushort rangesN);
|
||||
idx_t* CAT(spMMSizeUpperbound_,0)(spmat* ,spmat* );
|
||||
idx_t* CAT(spMMSizeUpperbound_,1)(spmat* ,spmat* );
|
||||
idx_t* CAT(spMMSizeUpperboundColParts_,0)(spmat* ,spmat* ,ushort);
|
||||
idx_t* CAT(spMMSizeUpperboundColParts_,1)(spmat* ,spmat* ,ushort);
|
||||
|
||||
/* Multi implementation of symbolic product of sparse matrixes, config macros
|
||||
* OFF_F: C-Fortran spmat indexing
|
||||
* OUT_IDXS: indexes output
|
||||
* COL_PARTS: partitioning columns output...
|
||||
*/
|
||||
|
||||
#define OUT_IDXS_ON OutIdxs_
|
||||
#define COL_PARTS_ON ColParts_
|
||||
#undef OUT_IDXS
|
||||
#undef COL_PARTS
|
||||
|
||||
#define OFF_F 0
|
||||
///generate basic versions
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
|
||||
///generate outIdxs versions
|
||||
#define OUT_IDXS OUT_IDXS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
|
||||
#undef OUT_IDXS
|
||||
///generate colParts versions
|
||||
#define COL_PARTS COL_PARTS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
|
||||
//generate outIdxs AND colParts versions
|
||||
#define OUT_IDXS OUT_IDXS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
|
||||
|
||||
#undef OUT_IDXS
|
||||
#undef COL_PARTS
|
||||
#undef OFF_F
|
||||
#define OFF_F 1
|
||||
///generate basic versions
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
|
||||
///generate outIdxs versions
|
||||
#define OUT_IDXS OUT_IDXS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
|
||||
#undef OUT_IDXS
|
||||
///generate colParts versions
|
||||
#define COL_PARTS COL_PARTS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
|
||||
//generate outIdxs AND colParts versions
|
||||
#define OUT_IDXS OUT_IDXS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.c"
|
||||
|
||||
#undef OUT_IDXS
|
||||
#undef COL_PARTS
|
||||
#undef OFF_F
|
@ -0,0 +1,573 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*#pragma message( "compiling SpMM_CSR_OMP_Generic.c with OFF_F as:" STR(OFF_F) )*/
|
||||
#ifndef OFF_F
|
||||
#error generic implementation requires OFF_F defined
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//////////////////// COMPUTE CORE Sp[3]MM Upperbound //////////////////////////
|
||||
spmat* CAT(spmmSerial_,OFF_F)(spmat* A,spmat* B, CONFIG* _cfg){ //serial implementation
|
||||
spmat* AB = NULL;
|
||||
ACC_DENSE acc;
|
||||
if ( allocAccDense(&acc,B->N) ) goto _free;
|
||||
if (!(AB = allocSpMatrix(A->M,B->N))) goto _free;
|
||||
for( idx_t r=0; r<A->M; r++ ){
|
||||
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
|
||||
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, &acc);
|
||||
sparsifyDirect(&acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
|
||||
}
|
||||
_free:
|
||||
freeAccsDense(&acc,1);
|
||||
|
||||
return AB;
|
||||
}
|
||||
////////Sp3MM as 2 x SpMM
|
||||
///1D
|
||||
spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
|
||||
DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N);
|
||||
///thread aux
|
||||
ACC_DENSE *accVects = NULL,*acc;
|
||||
SPMM_ACC* outAccumul=NULL;
|
||||
idx_t* rowsSizes = NULL;
|
||||
///init AB matrix with SPMM heuristic preallocation
|
||||
spmat* AB = allocSpMatrix(A->M,B->N);
|
||||
if (!AB) goto _err;
|
||||
if (!(rowsSizes = CAT(spMMSizeUpperbound_,OFF_F) (A,B))) goto _err;
|
||||
///aux structures alloc
|
||||
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
|
||||
ERRPRINT("accVects init failed\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(outAccumul = initSpMMAcc(rowsSizes[AB->M],AB->M))) goto _err;
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
//prepare sparse accumulators with U.Bounded rows[parts] starts
|
||||
SPACC* accSp;
|
||||
for( idx_t r=0,rSizeCumul=0; r<AB->M; rSizeCumul += rowsSizes[r++]){
|
||||
accSp = outAccumul->accs+r;
|
||||
accSp->JA = outAccumul->JA + rSizeCumul;
|
||||
accSp->AS = outAccumul->AS + rSizeCumul;
|
||||
//accSp->len = rowsSizes[r];
|
||||
}
|
||||
#endif
|
||||
|
||||
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg);
|
||||
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
|
||||
#pragma omp parallel for schedule(runtime) private(acc)
|
||||
for (ulong r=0; r<A->M; r++){ //row-by-row formulation
|
||||
//iterate over nz entry index c inside current row r
|
||||
acc = accVects + omp_get_thread_num();
|
||||
/* direct use of sparse scalar vector multiplication
|
||||
for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; ja<A->IRP[r+1]-OFF_F; ja++){
|
||||
ca = A->JA[ja] - OFF_F;
|
||||
jb = B->IRP[ca] - OFF_F;
|
||||
bRowLen = B->IRP[ca+1] - B->IRP[ca];
|
||||
CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc);
|
||||
}*/
|
||||
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
|
||||
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
|
||||
//trasform accumulated dense vector to a CSR row
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
_sparsifyUB(acc,outAccumul->accs+r,0);
|
||||
#else
|
||||
sparsifyUBNoPartsBounds(outAccumul,acc,outAccumul->accs + r,0);
|
||||
#endif
|
||||
_resetAccVect(acc); //rezero for the next A row
|
||||
}
|
||||
///merge sparse row computed before
|
||||
if (mergeRows(outAccumul->accs,AB)) goto _err;
|
||||
#if OFF_F != 0
|
||||
C_FortranShiftIdxs(AB);
|
||||
#endif
|
||||
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
|
||||
DEBUG checkOverallocPercent(rowsSizes,AB);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
if(AB) freeSpmat(AB);
|
||||
AB=NULL; //nothing'll be returned
|
||||
_free:
|
||||
if(rowsSizes) free(rowsSizes);
|
||||
if(accVects) freeAccsDense(accVects,cfg->threadNum);
|
||||
if(outAccumul) freeSpMMAcc(outAccumul);
|
||||
|
||||
return AB;
|
||||
}
|
||||
|
||||
spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
|
||||
DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N);
|
||||
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
|
||||
///thread aux
|
||||
ACC_DENSE *accVects = NULL,*acc;
|
||||
SPMM_ACC* outAccumul=NULL;
|
||||
idx_t* rowsSizes = NULL;
|
||||
///init AB matrix with SPMM heuristic preallocation
|
||||
spmat* AB = allocSpMatrix(A->M,B->N);
|
||||
if (!AB) goto _err;
|
||||
if (!(rowsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B))) goto _err;
|
||||
///aux structures alloc
|
||||
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
|
||||
ERRPRINT("accVects init failed\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(outAccumul = initSpMMAcc(rowsSizes[AB->M],AB->M))) goto _err;
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
//prepare sparse accumulators with U.Bounded rows[parts] starts
|
||||
SPACC* accSp;
|
||||
for( idx_t r=0,rSizeCumul=0; r<AB->M; rSizeCumul += rowsSizes[r++]){
|
||||
accSp = outAccumul->accs+r;
|
||||
accSp->JA = outAccumul->JA + rSizeCumul;
|
||||
accSp->AS = outAccumul->AS + rSizeCumul;
|
||||
}
|
||||
#endif
|
||||
|
||||
//perform Gustavson over rows blocks -> M / @cfg->gridRows
|
||||
ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows;
|
||||
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg);
|
||||
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
|
||||
ulong b,startRow,block; //omp for aux vars
|
||||
#pragma omp parallel for schedule(runtime) private(acc,startRow,block)
|
||||
for (b=0; b < cfg->gridRows; b++){
|
||||
block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem);
|
||||
startRow= UNIF_REMINDER_DISTRI_STARTIDX(b,rowBlock,rowBlockRem);
|
||||
acc = accVects + omp_get_thread_num();
|
||||
|
||||
DEBUGPRINT{
|
||||
fflush(NULL);
|
||||
printf("block %lu\t%lu:%lu(%lu)\n",b,startRow,startRow+block-1,block);
|
||||
fflush(NULL);
|
||||
}
|
||||
//row-by-row formulation in the given row block
|
||||
for (ulong r=startRow; r<startRow+block; r++){
|
||||
//iterate over nz entry index c inside current row r
|
||||
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++)
|
||||
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
|
||||
//trasform accumulated dense vector to a CSR row
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
_sparsifyUB(acc,outAccumul->accs+r,0);
|
||||
#else
|
||||
sparsifyUBNoPartsBounds(outAccumul,acc,outAccumul->accs + r,0);
|
||||
#endif
|
||||
_resetAccVect(acc); //rezero for the next A row
|
||||
}
|
||||
}
|
||||
///merge sparse row computed before
|
||||
if (mergeRows(outAccumul->accs,AB)) goto _err;
|
||||
#if OFF_F != 0
|
||||
C_FortranShiftIdxs(AB);
|
||||
#endif
|
||||
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
|
||||
DEBUG checkOverallocPercent(rowsSizes,AB);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
if(AB) freeSpmat(AB);
|
||||
AB=NULL; //nothing'll be returned
|
||||
_free:
|
||||
if(rowsSizes) free(rowsSizes);
|
||||
if(accVects) freeAccsDense(accVects,cfg->threadNum);
|
||||
if(outAccumul) freeSpMMAcc(outAccumul);
|
||||
|
||||
return AB;
|
||||
}
|
||||
|
||||
///2D
|
||||
//PARTITIONS NOT ALLOCATED
|
||||
spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
|
||||
DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N);
|
||||
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
|
||||
idx_t* bColOffsets = NULL; //B group columns starting offset for each row
|
||||
ACC_DENSE *accVectors=NULL,*accV;
|
||||
SPACC* accRowPart;
|
||||
spmat* AB = allocSpMatrix(A->M,B->N);
|
||||
SPMM_ACC* outAccumul=NULL;
|
||||
idx_t* rowsPartsSizes=NULL;
|
||||
if (!AB) goto _err;
|
||||
//2D indexing aux vars
|
||||
ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
|
||||
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
|
||||
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
|
||||
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables
|
||||
////get bColOffsets for B column groups
|
||||
if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols)))
|
||||
goto _err;
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
uint rowsPartsSizesN = aSubRowsN;
|
||||
if (!(rowsPartsSizes = CAT(spMMSizeUpperboundColParts_,OFF_F)
|
||||
(A,B,cfg->gridCols,bColOffsets)))
|
||||
#else
|
||||
uint rowsPartsSizesN = AB->M;
|
||||
if (!(rowsPartsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B)))
|
||||
#endif
|
||||
goto _err;
|
||||
|
||||
//aux vectors
|
||||
|
||||
///other AUX struct alloc
|
||||
if (!(accVectors = _initAccVectors(gridSize,_colBlock+(_colBlockRem?1:0)))){
|
||||
ERRPRINT("accVectors calloc failed\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(outAccumul = initSpMMAcc(rowsPartsSizes[rowsPartsSizesN],aSubRowsN)))
|
||||
goto _err;
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
//prepare sparse accumulators with U.Bounded rows[parts] starts
|
||||
SPACC* accSp;
|
||||
for( idx_t i=0,rSizeCumul=0; i<aSubRowsN; rSizeCumul += rowsPartsSizes[i++]){
|
||||
accSp = outAccumul->accs+i;
|
||||
accSp->JA = outAccumul->JA + rSizeCumul;
|
||||
accSp->AS = outAccumul->AS + rSizeCumul;
|
||||
}
|
||||
//memset(outAccumul->AS,0,sizeof(double)*rowsSizes[AB->M]);memset(outAccumul->JA,0,sizeof(idx_t)*rowsSizes[AB->M]);
|
||||
#endif
|
||||
|
||||
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg);
|
||||
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
|
||||
ulong tileID,t_i,t_j; //for aux vars
|
||||
ulong bPartLen,bPartID,bPartOffset;//B partition acces aux vars
|
||||
#pragma omp parallel for schedule(runtime) \
|
||||
private(accV,accRowPart,rowBlock,colBlock,startRow,startCol,\
|
||||
bPartLen,bPartID,bPartOffset,t_i,t_j)
|
||||
for (tileID = 0; tileID < gridSize; tileID++){
|
||||
///get iteration's indexing variables
|
||||
//tile index in the 2D grid of AB computation TODO OMP HOW TO PARALLELIZE 2 FOR
|
||||
t_i = tileID/cfg->gridCols; //i-th row block
|
||||
t_j = tileID%cfg->gridCols; //j-th col block
|
||||
//get tile row-cols group FAIR sizes
|
||||
rowBlock = UNIF_REMINDER_DISTRI(t_i,_rowBlock,_rowBlockRem);
|
||||
startRow = UNIF_REMINDER_DISTRI_STARTIDX(t_i,_rowBlock,_rowBlockRem);
|
||||
startCol = UNIF_REMINDER_DISTRI_STARTIDX(t_j,_colBlock,_colBlockRem);
|
||||
|
||||
accV = accVectors + tileID;
|
||||
|
||||
DEBUGPRINT{
|
||||
fflush(NULL);
|
||||
colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem);
|
||||
printf("rowBlock [%lu\t%lu:%lu(%lu)]\t",t_i,startRow,startRow+rowBlock-1,rowBlock);
|
||||
printf("colBlock [%lu\t%lu:%lu(%lu)]\n",t_j,startCol,startCol+colBlock-1,colBlock);
|
||||
fflush(NULL);
|
||||
}
|
||||
///AB[t_i][t_j] block compute
|
||||
for (ulong r=startRow; r<startRow+rowBlock; r++){
|
||||
//iterate over nz col index j inside current row r
|
||||
//row-by-row restricted to colsubset of B to get AB[r][:colBlock:]
|
||||
for (ulong j=A->IRP[r]-OFF_F,c; j<A->IRP[r+1]-OFF_F; j++){
|
||||
//get start of B[A->JA[j]][:colBlock:]
|
||||
c = A->JA[j]-OFF_F; // col of nnz in A[r][:] <-> target B row
|
||||
bPartID = IDX2D(c,t_j,cfg->gridCols);
|
||||
bPartOffset = bColOffsets[ bPartID ];
|
||||
bPartLen = bColOffsets[ bPartID + 1 ] - bPartOffset;
|
||||
|
||||
CAT(scSparseVectMulPart_,OFF_F)(A->AS[j],B->AS+bPartOffset,
|
||||
B->JA+bPartOffset,bPartLen,startCol,accV);
|
||||
}
|
||||
|
||||
accRowPart = outAccumul->accs + IDX2D(r,t_j,cfg->gridCols);
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
_sparsifyUB(accV,accRowPart,startCol);
|
||||
#else
|
||||
sparsifyUBNoPartsBounds(outAccumul,accV,accRowPart,startCol);
|
||||
#endif
|
||||
_resetAccVect(accV);
|
||||
}
|
||||
}
|
||||
if (mergeRowsPartitions(outAccumul->accs,AB,cfg)) goto _err;
|
||||
#if OFF_F != 0
|
||||
C_FortranShiftIdxs(AB);
|
||||
#endif
|
||||
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
|
||||
DEBUG
|
||||
CAT(checkOverallocRowPartsPercent_, OFF_F)(rowsPartsSizes, AB, cfg->gridCols, bColOffsets);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
if (AB) freeSpmat(AB);
|
||||
AB = NULL;
|
||||
_free:
|
||||
free(rowsPartsSizes);
|
||||
free(bColOffsets);
|
||||
if (accVectors) freeAccsDense(accVectors,gridSize);
|
||||
if (outAccumul) freeSpMMAcc(outAccumul);
|
||||
|
||||
return AB;
|
||||
|
||||
}
|
||||
|
||||
spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
|
||||
DEBUG printf("spmm\trowBlocks of A,\tcolBlocks (allcd) of B\tM=%luxN=%lu\n",A->M,B->N);
|
||||
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
|
||||
spmat *AB = NULL, *colPartsB = NULL, *colPart;
|
||||
idx_t* rowsPartsSizes=NULL;
|
||||
//aux vectors
|
||||
SPMM_ACC* outAccumul=NULL;
|
||||
ACC_DENSE *accVectors=NULL,*accV;
|
||||
SPACC* accRowPart;
|
||||
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables
|
||||
//2D indexing aux vars
|
||||
idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
|
||||
idx_t* bColOffsets = NULL;
|
||||
|
||||
if (!(AB = allocSpMatrix(A->M,B->N))) goto _err;
|
||||
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
|
||||
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
|
||||
|
||||
////B cols partition in CSRs
|
||||
//if (!(colPartsB = CAT(colsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) goto _err;
|
||||
if (!(colPartsB = CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)
|
||||
(B, cfg->gridCols, &bColOffsets)))
|
||||
goto _err;
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
uint rowsPartsSizesN = aSubRowsN;
|
||||
if (!(rowsPartsSizes = CAT(spMMSizeUpperboundColParts_,OFF_F)
|
||||
(A, B, cfg->gridCols, bColOffsets)))
|
||||
#else
|
||||
uint rowsPartsSizesN = AB->M;
|
||||
if (!(rowsPartsSizes = CAT(spMMSizeUpperbound_,OFF_F)(A,B)))
|
||||
#endif
|
||||
goto _err;
|
||||
///other AUX struct alloc
|
||||
if (!(accVectors = _initAccVectors(gridSize,_colBlock+(_colBlockRem?1:0)))){
|
||||
ERRPRINT("accVectors calloc failed\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(outAccumul = initSpMMAcc(rowsPartsSizes[rowsPartsSizesN],aSubRowsN)))
|
||||
goto _err;
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
//prepare sparse accumulators with U.Bounded rows[parts] starts
|
||||
SPACC* accSp;
|
||||
for( idx_t i=0,rLenCumul=0; i<aSubRowsN; rLenCumul += rowsPartsSizes[i++]){
|
||||
accSp = outAccumul->accs+i;
|
||||
accSp->JA = outAccumul->JA + rLenCumul;
|
||||
accSp->AS = outAccumul->AS + rLenCumul;
|
||||
}
|
||||
#endif
|
||||
|
||||
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg);
|
||||
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
|
||||
ulong tileID,t_i,t_j; //for aux vars
|
||||
#pragma omp parallel for schedule(runtime) \
|
||||
private(accV,accRowPart,colPart,rowBlock,colBlock,startRow,startCol,t_i,t_j)
|
||||
for (tileID = 0; tileID < gridSize; tileID++){
|
||||
///get iteration's indexing variables
|
||||
//tile index in the 2D grid of AB computation TODO OMP HOW TO PARALLELIZE 2 FOR
|
||||
t_i = tileID/cfg->gridCols; //i-th row block
|
||||
t_j = tileID%cfg->gridCols; //j-th col block
|
||||
//get tile row-cols group FAIR sizes
|
||||
rowBlock = UNIF_REMINDER_DISTRI(t_i,_rowBlock,_rowBlockRem);
|
||||
colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem);
|
||||
startRow = UNIF_REMINDER_DISTRI_STARTIDX(t_i,_rowBlock,_rowBlockRem);
|
||||
startCol = UNIF_REMINDER_DISTRI_STARTIDX(t_j,_colBlock,_colBlockRem);
|
||||
|
||||
colPart = colPartsB + t_j;
|
||||
accV = accVectors + tileID;
|
||||
|
||||
DEBUGPRINT{
|
||||
fflush(NULL);
|
||||
printf("rowBlock [%lu\t%lu:%lu(%lu)]\t",
|
||||
t_i,startRow,startRow+rowBlock-1,rowBlock);
|
||||
printf("colBlock [%lu\t%lu:%lu(%lu)]\n",
|
||||
t_j,startCol,startCol+colBlock-1,colBlock);
|
||||
fflush(NULL);
|
||||
}
|
||||
///AB[t_i][t_j] block compute
|
||||
for (ulong r=startRow; r<startRow+rowBlock; r++){
|
||||
//iterate over nz col index j inside current row r
|
||||
//row-by-row restricted to colsubset of B to get AB[r][:colBlock:]
|
||||
for (ulong j=A->IRP[r]-OFF_F,c,bRowStart,bRowLen; j<A->IRP[r+1]-OFF_F; j++){
|
||||
//get start of B[A->JA[j]][:colBlock:]
|
||||
c = A->JA[j]-OFF_F; // column of nnz entry in A[r][:] <-> target B row
|
||||
bRowStart = colPart->IRP[c];
|
||||
#ifdef ROWLENS
|
||||
bRowLen = colPart->RL[c];
|
||||
#else
|
||||
bRowLen = colPart->IRP[c+1] - bRowStart;
|
||||
#endif
|
||||
CAT(scSparseVectMulPart_,OFF_F)(A->AS[j],
|
||||
colPart->AS+bRowStart,colPart->JA+bRowStart,
|
||||
bRowLen,startCol,accV);
|
||||
}
|
||||
|
||||
accRowPart = outAccumul->accs + IDX2D(r,t_j,cfg->gridCols);
|
||||
#if SPARSIFY_PRE_PARTITIONING == T
|
||||
_sparsifyUB(accV,accRowPart,startCol);
|
||||
#else
|
||||
sparsifyUBNoPartsBounds(outAccumul,accV,accRowPart,startCol);
|
||||
#endif
|
||||
_resetAccVect(accV);
|
||||
}
|
||||
}
|
||||
if (mergeRowsPartitions(outAccumul->accs,AB,cfg)) goto _err;
|
||||
#if OFF_F != 0
|
||||
C_FortranShiftIdxs(AB);
|
||||
#endif
|
||||
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
|
||||
DEBUG
|
||||
CAT(checkOverallocRowPartsPercent_, OFF_F)(rowsPartsSizes,AB,cfg->gridCols,bColOffsets);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
ERRPRINT("spmmRowByRow2DBlocksAllocated failed\n");
|
||||
if (AB) freeSpmat(AB);
|
||||
AB = NULL;
|
||||
_free:
|
||||
if (colPartsB){
|
||||
for (ulong i=0; i<cfg->gridCols; i++)
|
||||
freeSpmatInternal(colPartsB+i);
|
||||
free(colPartsB);
|
||||
}
|
||||
free(rowsPartsSizes);
|
||||
free(bColOffsets);
|
||||
if (accVectors) freeAccsDense(accVectors,gridSize);
|
||||
if (outAccumul) freeSpMMAcc(outAccumul);
|
||||
|
||||
return AB;
|
||||
|
||||
}
|
||||
///SP3MM
|
||||
spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P,
|
||||
CONFIG* cfg,SPMM_INTERF spmm){
|
||||
|
||||
double end,start,elapsed,partial,flops;
|
||||
spmat *RAC = NULL, *out = NULL;
|
||||
|
||||
if (!spmm){
|
||||
//TODO runtime on sizes decide witch spmm implementation to use if not given
|
||||
spmm = &CAT(spmmRowByRow2DBlocks_,OFF_F);
|
||||
}
|
||||
/* TODO
|
||||
alloc dense aux vector, reusable over 3 product
|
||||
TODO arrays sovrallocati per poter essere riusati nelle 2 SpMM
|
||||
ulong auxVectSize = MAX(R->N,AC->N);
|
||||
auxVectSize = MAX(auxVectSize,P->N);
|
||||
*/
|
||||
|
||||
start = omp_get_wtime();
|
||||
/// triple product as a pair of spmm
|
||||
if (!(RAC = spmm(R,AC,cfg))) goto _free;
|
||||
AUDIT_INTERNAL_TIMES partial = End - Start;
|
||||
if (!(out = spmm(RAC,P,cfg))) goto _free;
|
||||
//
|
||||
end = omp_get_wtime();
|
||||
ElapsedInternal = End - Start + partial;
|
||||
VERBOSE {
|
||||
elapsed = end - start;
|
||||
flops = (2 * R->NZ * P->NZ * AC->NZ) / (elapsed);
|
||||
printf("elapsed %le - flops %le",elapsed,flops);
|
||||
AUDIT_INTERNAL_TIMES
|
||||
printf("\tinternalTime: %le",ElapsedInternal);
|
||||
printf("\n");
|
||||
}
|
||||
_free:
|
||||
zeroSpmat(RAC);
|
||||
freeSpmat(RAC);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
////////Sp3MM direct
|
||||
///1D
|
||||
spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,
|
||||
SPMM_INTERF spmm){
|
||||
|
||||
ulong* rowSizes = NULL;
|
||||
SPMM_ACC* outAccumul=NULL;
|
||||
ACC_DENSE *accVectorsR_AC=NULL,*accVectorsRAC_P=NULL,*accRAC,*accRACP;
|
||||
|
||||
///init AB matrix with SPMM heuristic preallocation
|
||||
spmat* out = allocSpMatrix(R->M,P->N);
|
||||
if (!out) goto _err;
|
||||
/*TODO 3MM VERSION COMPUTE OUT ALLOC :
|
||||
-> \forall RAC.row -> hashmap{col=True}->(AVL||RBTHREE); upperBound std col RAC.rows.cols in hashmap || SYM_bis
|
||||
* NB: UP per RACP => NN note dimensioni righe precise => stesso approccio riservazione spazio di spmm ( fetch_and_add )
|
||||
* SYM_BIS ==> note dimensioni righe =>
|
||||
* 1) pre riservazione spazio per righe -> cache allignement per threads
|
||||
-(sc. static & blocco di righe allineato a cache block successivo a blocco righe precedente)
|
||||
-(sc. dynamic& righe tutte allineate a cache block (NO OVERLAPS!) -> huge overhead ?
|
||||
* 2) pre riservazione spazio righe diretamente in out CSR
|
||||
-> probabili cache blocks overlap; salvo costo di P.M memcpy
|
||||
*/
|
||||
if (!(rowSizes = CAT(spMMSizeUpperbound_,OFF_F)(R,AC))) goto _err; ///TODO TOO LOOSE UB...INTEGRATE RBTREE FOR SYM->PRECISE
|
||||
///aux structures alloc
|
||||
if (!(outAccumul = initSpMMAcc(rowSizes[R->M],P->M))) goto _err; //TODO size estimated with RAC mat
|
||||
if (!(accVectorsR_AC = _initAccVectors(cfg->threadNum,AC->N))){ //TODO LESS || REUSE
|
||||
ERRPRINT("accVectorsR_AC init failed\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(accVectorsRAC_P = _initAccVectors(cfg->threadNum,R->N))){ //TODO LESS || REUSE
|
||||
ERRPRINT("accVectorsRAC_P init failed\n");
|
||||
goto _err;
|
||||
}
|
||||
|
||||
ulong c;
|
||||
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (R->M,R,cfg);
|
||||
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
|
||||
#pragma omp parallel for schedule(runtime) private(accRAC,accRACP,c)
|
||||
for (ulong r=0; r<R->M; r++){ //row-by-row formulation
|
||||
//iterate over nz entry index c inside current row r
|
||||
accRAC = accVectorsR_AC + omp_get_thread_num();
|
||||
accRACP = accVectorsRAC_P + omp_get_thread_num();
|
||||
//computing (tmp) R*AC r-th row
|
||||
for (idx_t j=R->IRP[r]-OFF_F; j<R->IRP[r+1]-OFF_F; j++)
|
||||
CAT(scSparseRowMul_,OFF_F)(R->AS[j], AC, R->JA[j]-OFF_F, accRAC);
|
||||
//forward the computed row
|
||||
for (idx_t j=0; j<accRAC->nnzIdxMap.len; j++){
|
||||
c = accRAC->nnzIdx[j];
|
||||
CAT(scSparseRowMul_,OFF_F)(accRAC->v[c],P,c,accRACP);
|
||||
}
|
||||
//trasform accumulated dense vector to a CSR row TODO in UB buff
|
||||
sparsifyUBNoPartsBounds(outAccumul,accRACP,outAccumul->accs+r,0);
|
||||
_resetAccVect(accRAC);
|
||||
_resetAccVect(accRACP);
|
||||
}
|
||||
///merge sparse row computed before
|
||||
if (mergeRows(outAccumul->accs,out)) goto _err;
|
||||
#if OFF_F != 0
|
||||
C_FortranShiftIdxs(out);
|
||||
#endif
|
||||
AUDIT_INTERNAL_TIMES{
|
||||
End=omp_get_wtime();
|
||||
ElapsedInternal = End-Start;
|
||||
}
|
||||
DEBUG checkOverallocPercent(rowSizes,out);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
if(out) freeSpmat(out);
|
||||
out = NULL;
|
||||
_free:
|
||||
if(rowSizes) free(rowSizes);
|
||||
if(accVectorsR_AC) freeAccsDense(accVectorsR_AC,cfg->threadNum);
|
||||
if(accVectorsRAC_P) freeAccsDense(accVectorsRAC_P,cfg->threadNum);
|
||||
if(outAccumul) freeSpMMAcc(outAccumul);
|
||||
|
||||
return out;
|
||||
}
|
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
#include <config.h>
|
||||
|
||||
char* SCHEDULES[]={"OMP_SCHED_STATIC","OMP_SCHED_DYNAMIC","OMP_SCHED_GUIDED","OMP_SCHED_AUTO"};
|
||||
void ompGetRuntimeSchedule(int* kind_chunk_monotonic){
|
||||
/*
|
||||
* export OMP_SCHEDULE="[modifier:]kind[, chunk]"
|
||||
* typedef enum omp_sched_t {
|
||||
* //schedule kinds
|
||||
* omp_sched_static = 0x1,
|
||||
* omp_sched_dynamic = 0x2,
|
||||
* omp_sched_guided = 0x3,
|
||||
* omp_sched_auto = 0x4,
|
||||
* //schedule modifier //TODO API>=5.0
|
||||
* omp_sched_monotonic = 0x80000000u //TODO OMP >=5
|
||||
* } omp_sched_t;
|
||||
*/
|
||||
omp_sched_t k,kind; int chunk_size,monotonic=0;
|
||||
omp_get_schedule(&kind,&chunk_size);
|
||||
k=kind; //[monotonic OFF]
|
||||
#if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0
|
||||
monotonic = omp_sched_monotonic & kind;
|
||||
if(monotonic) k = kind - omp_sched_monotonic;
|
||||
#endif
|
||||
printf("omp sched gather:\tkind:%s\tomp chunkSize:%d\tmonotonic:%s\tfairChunkFolding:%d\n",
|
||||
SCHEDULES[k-1],chunk_size,monotonic?"Y":"N",FAIR_CHUNKS_FOLDING);
|
||||
if (kind_chunk_monotonic){
|
||||
kind_chunk_monotonic[0] = k;
|
||||
kind_chunk_monotonic[1] = chunk_size;
|
||||
kind_chunk_monotonic[2] = monotonic;
|
||||
}
|
||||
}
|
||||
|
||||
float ompVersionMacroMap(){
|
||||
switch ( _OPENMP ){
|
||||
case 200505: return 2.5;
|
||||
case 200805: return 3.0;
|
||||
case 201107: return 3.1;
|
||||
case 201307: return 4.0;
|
||||
case 201511: return 4.5;
|
||||
case 201811: return 5.0;
|
||||
case 202011: return 5.1;
|
||||
}
|
||||
}
|
||||
//WRAPPER to print all ICV vars
|
||||
#ifdef OMP_GET_ICV_MAIN
|
||||
int main(){
|
||||
#else
|
||||
void ompGetAllICV(){
|
||||
#endif
|
||||
printf("export OMP_DISPLAY_ENV=VERBOSE for full ICV details\n");
|
||||
printf("omp MAX THREADS USABLE\t%d\n",omp_get_max_threads());
|
||||
ompGetRuntimeSchedule(NULL);
|
||||
printf("omp API version:\t %1.1f\n",ompVersionMacroMap());
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,193 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2021 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2021 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved.
|
||||
* Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2013 INRIA. All rights reserved.
|
||||
* Copyright (c) 2015 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2021 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017-2019 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
|
||||
* Copyright (c) 2021-2022 Google, LLC. All rights reserved.
|
||||
* Copyright (c) 2021-2022 Amazon.com, Inc. or its affiliates. All Rights
|
||||
* reserved.
|
||||
* Copyright (c) 2021 Bull S.A.S. All rights reserved.
|
||||
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||
* Copyright (c) 2018-2021 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/*
|
||||
* Error classes and codes
|
||||
* Do not change the values of these without also modifying mpif.h.in.
|
||||
*/
|
||||
#define MPI_SUCCESS 0
|
||||
#define MPI_ERR_BUFFER 1
|
||||
#define MPI_ERR_COUNT 2
|
||||
#define MPI_ERR_TYPE 3
|
||||
#define MPI_ERR_TAG 4
|
||||
#define MPI_ERR_COMM 5
|
||||
#define MPI_ERR_RANK 6
|
||||
#define MPI_ERR_REQUEST 7
|
||||
#define MPI_ERR_ROOT 8
|
||||
#define MPI_ERR_GROUP 9
|
||||
#define MPI_ERR_OP 10
|
||||
#define MPI_ERR_TOPOLOGY 11
|
||||
#define MPI_ERR_DIMS 12
|
||||
#define MPI_ERR_ARG 13
|
||||
#define MPI_ERR_UNKNOWN 14
|
||||
#define MPI_ERR_TRUNCATE 15
|
||||
#define MPI_ERR_OTHER 16
|
||||
#define MPI_ERR_INTERN 17
|
||||
#define MPI_ERR_IN_STATUS 18
|
||||
#define MPI_ERR_PENDING 19
|
||||
#define MPI_ERR_ACCESS 20
|
||||
#define MPI_ERR_AMODE 21
|
||||
#define MPI_ERR_ASSERT 22
|
||||
#define MPI_ERR_BAD_FILE 23
|
||||
#define MPI_ERR_BASE 24
|
||||
#define MPI_ERR_CONVERSION 25
|
||||
#define MPI_ERR_DISP 26
|
||||
#define MPI_ERR_DUP_DATAREP 27
|
||||
#define MPI_ERR_FILE_EXISTS 28
|
||||
#define MPI_ERR_FILE_IN_USE 29
|
||||
#define MPI_ERR_FILE 30
|
||||
#define MPI_ERR_INFO_KEY 31
|
||||
#define MPI_ERR_INFO_NOKEY 32
|
||||
#define MPI_ERR_INFO_VALUE 33
|
||||
#define MPI_ERR_INFO 34
|
||||
#define MPI_ERR_IO 35
|
||||
#define MPI_ERR_KEYVAL 36
|
||||
#define MPI_ERR_LOCKTYPE 37
|
||||
#define MPI_ERR_NAME 38
|
||||
#define MPI_ERR_NO_MEM 39
|
||||
#define MPI_ERR_NOT_SAME 40
|
||||
#define MPI_ERR_NO_SPACE 41
|
||||
#define MPI_ERR_NO_SUCH_FILE 42
|
||||
#define MPI_ERR_PORT 43
|
||||
#define MPI_ERR_QUOTA 44
|
||||
#define MPI_ERR_READ_ONLY 45
|
||||
#define MPI_ERR_RMA_CONFLICT 46
|
||||
#define MPI_ERR_RMA_SYNC 47
|
||||
#define MPI_ERR_SERVICE 48
|
||||
#define MPI_ERR_SIZE 49
|
||||
#define MPI_ERR_SPAWN 50
|
||||
#define MPI_ERR_UNSUPPORTED_DATAREP 51
|
||||
#define MPI_ERR_UNSUPPORTED_OPERATION 52
|
||||
#define MPI_ERR_WIN 53
|
||||
|
||||
#define MPI_T_ERR_MEMORY 54
|
||||
#define MPI_T_ERR_NOT_INITIALIZED 55
|
||||
#define MPI_T_ERR_CANNOT_INIT 56
|
||||
#define MPI_T_ERR_INVALID_INDEX 57
|
||||
#define MPI_T_ERR_INVALID_ITEM 58
|
||||
#define MPI_T_ERR_INVALID_HANDLE 59
|
||||
#define MPI_T_ERR_OUT_OF_HANDLES 60
|
||||
#define MPI_T_ERR_OUT_OF_SESSIONS 61
|
||||
#define MPI_T_ERR_INVALID_SESSION 62
|
||||
#define MPI_T_ERR_CVAR_SET_NOT_NOW 63
|
||||
#define MPI_T_ERR_CVAR_SET_NEVER 64
|
||||
#define MPI_T_ERR_PVAR_NO_STARTSTOP 65
|
||||
#define MPI_T_ERR_PVAR_NO_WRITE 66
|
||||
#define MPI_T_ERR_PVAR_NO_ATOMIC 67
|
||||
#define MPI_ERR_RMA_RANGE 68
|
||||
#define MPI_ERR_RMA_ATTACH 69
|
||||
#define MPI_ERR_RMA_FLAVOR 70
|
||||
#define MPI_ERR_RMA_SHARED 71
|
||||
#define MPI_T_ERR_INVALID 72
|
||||
#define MPI_T_ERR_INVALID_NAME 73
|
||||
#define MPI_ERR_PROC_ABORTED 74
|
||||
|
||||
/* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */
|
||||
#define MPI_ERR_PROC_FAILED 75
|
||||
#define MPI_ERR_PROC_FAILED_PENDING 76
|
||||
#define MPI_ERR_REVOKED 77
|
||||
|
||||
/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined
|
||||
MPI_ERR_<foo> code. Set the last code to allow some room for adding
|
||||
error codes without breaking ABI. */
|
||||
#define MPI_ERR_LASTCODE 92
|
||||
|
||||
/*
|
||||
* Comparison results. Don't change the order of these, the group
|
||||
* comparison functions rely on it.
|
||||
* Do not change the order of these without also modifying mpif.h.in.
|
||||
*/
|
||||
enum {
|
||||
MPI_IDENT,
|
||||
MPI_CONGRUENT,
|
||||
MPI_SIMILAR,
|
||||
MPI_UNEQUAL
|
||||
};
|
||||
|
||||
/*
|
||||
* MPI_Init_thread constants
|
||||
* Do not change the order of these without also modifying mpif.h.in.
|
||||
*/
|
||||
enum {
|
||||
MPI_THREAD_SINGLE,
|
||||
MPI_THREAD_FUNNELED,
|
||||
MPI_THREAD_SERIALIZED,
|
||||
MPI_THREAD_MULTIPLE
|
||||
};
|
||||
|
||||
/*
|
||||
* Datatype combiners.
|
||||
* Do not change the order of these without also modifying mpif.h.in.
|
||||
* (see also mpif-common.h.fin).
|
||||
*/
|
||||
enum {
|
||||
MPI_COMBINER_NAMED,
|
||||
MPI_COMBINER_DUP,
|
||||
MPI_COMBINER_CONTIGUOUS,
|
||||
MPI_COMBINER_VECTOR,
|
||||
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
|
||||
MPI_COMBINER_HVECTOR_INTEGER,
|
||||
#else
|
||||
OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER, /* preserve ABI compatibility */
|
||||
#endif
|
||||
MPI_COMBINER_HVECTOR,
|
||||
MPI_COMBINER_INDEXED,
|
||||
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
|
||||
MPI_COMBINER_HINDEXED_INTEGER,
|
||||
#else
|
||||
OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER, /* preserve ABI compatibility */
|
||||
#endif
|
||||
MPI_COMBINER_HINDEXED,
|
||||
MPI_COMBINER_INDEXED_BLOCK,
|
||||
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
|
||||
MPI_COMBINER_STRUCT_INTEGER,
|
||||
#else
|
||||
OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER, /* preserve ABI compatibility */
|
||||
#endif
|
||||
MPI_COMBINER_STRUCT,
|
||||
MPI_COMBINER_SUBARRAY,
|
||||
MPI_COMBINER_DARRAY,
|
||||
MPI_COMBINER_F90_REAL,
|
||||
MPI_COMBINER_F90_COMPLEX,
|
||||
MPI_COMBINER_F90_INTEGER,
|
||||
MPI_COMBINER_RESIZED,
|
||||
MPI_COMBINER_HINDEXED_BLOCK
|
||||
};
|
||||
|
||||
////MOCK ERRHANDELERS
|
||||
#define OMPI_ERRHANDLER_INVOKE(comm,err,fname) err
|
||||
#define OMPI_ERRHANDLER_NOHANDLE_INVOKE(err,fname) err
|
||||
|
@ -0,0 +1,499 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "macros.h"
|
||||
#include "sparseMatrix.h"
|
||||
#include "utils.h"
|
||||
|
||||
/////////////// no offset --> SINGLE implementation functions
|
||||
#ifndef SPARSE_UTILS_C
|
||||
#define SPARSE_UTILS_C
|
||||
///DENSE accumulator utils
|
||||
/*
|
||||
* alloc threads' aux arrays once and split them in threads' structures
|
||||
* so free them once from the first thread struct, with the original pointers returned from the alloc
|
||||
*/
|
||||
ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size){ //TODO PERF WITH NEXT
|
||||
ACC_DENSE* out = NULL;
|
||||
double* vAll = NULL;
|
||||
ulong* vAllNzIdx = NULL;
|
||||
if (!(out = calloc(num,sizeof(*out)))){
|
||||
ERRPRINT("_initAccVectors aux struct alloc failed\n");
|
||||
return NULL;
|
||||
}
|
||||
if (!(vAll = calloc(num*size,sizeof(*vAll)))) {
|
||||
ERRPRINT("_initAccVectors aux dense vectors alloc failed\n");
|
||||
goto err;
|
||||
}
|
||||
if (!(vAllNzIdx = calloc(num*size,sizeof(*vAllNzIdx)))) {
|
||||
ERRPRINT("_initAccVectors aux dense vectors' idx alloc failed\n");
|
||||
goto err;
|
||||
}
|
||||
for (ulong i=0; i<num; i++){
|
||||
out[i].vLen = size; //TODO USELESS INFO?
|
||||
out[i].v = vAll + i*size;
|
||||
out[i].nnzIdx = vAllNzIdx + i*size;;
|
||||
out[i].nnzIdxMap.len = 0;
|
||||
}
|
||||
return out;
|
||||
|
||||
err:
|
||||
free(out);
|
||||
free(vAll);
|
||||
free(vAllNzIdx);
|
||||
return NULL;
|
||||
}
|
||||
int allocAccDense(ACC_DENSE* v,ulong size){
|
||||
v->vLen = size;
|
||||
if (!(v->v = calloc(size,sizeof(*(v->v))))) {
|
||||
ERRPRINT("_initAccVectors aux dense vector alloc failed\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (!(v->nnzIdx = calloc(size,sizeof(*(v->nnzIdx))))) {
|
||||
ERRPRINT("_initAccVectors aux dense vector' idx alloc failed\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (initSpVectIdxDenseAcc(size, &v->nnzIdxMap)) return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
ACC_DENSE* _initAccVectors(ulong num,ulong size){
|
||||
ACC_DENSE* out = NULL;
|
||||
if (!(out = calloc(num,sizeof(*out)))){
|
||||
ERRPRINT("_initAccVectors aux struct alloc failed\n");
|
||||
return NULL;
|
||||
}
|
||||
for (ulong i=0; i<num; i++){
|
||||
if (allocAccDense(out+i,size)) goto _err;
|
||||
}
|
||||
return out;
|
||||
|
||||
_err:
|
||||
for (ulong i=0; i<num; i++){
|
||||
if (out[i].v) free(out[i].v);
|
||||
if (out[i].nnzIdx) free(out[i].nnzIdx);
|
||||
}
|
||||
free(out);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void freeAccsDense(ACC_DENSE* vectors,ulong num){
|
||||
for (ulong i=0; i<num; i++){
|
||||
free(vectors[i].v);
|
||||
free(vectors[i].nnzIdx);
|
||||
free(vectors[i].nnzIdxMap.idxsMap);
|
||||
}
|
||||
free(vectors);
|
||||
}
|
||||
void _freeAccsDenseChecks(ACC_DENSE* vectors,ulong num){
|
||||
if (!vectors) return;
|
||||
for (ulong i=0; i<num; i++){
|
||||
if(vectors[i].v) free(vectors[i].v);
|
||||
if(vectors[i].nnzIdx) free(vectors[i].nnzIdx);
|
||||
}
|
||||
free(vectors);
|
||||
}
|
||||
|
||||
int initSpVectIdxDenseAcc(idx_t idxMax,SPVECT_IDX_DENSE_MAP* vectIdxsMap){
|
||||
vectIdxsMap->len = 0;
|
||||
nnz_idxs_flags_t* idxMaps = &vectIdxsMap->idxsMap;
|
||||
#if SPVECT_IDX_BITWISE == TRUE //nnz presence falgs as bitflags in limbs
|
||||
vectIdxsMap->idxsMapN = INT_DIV_CEIL(idxMax, LIMB_SIZE_BIT);
|
||||
#else //nnz presence falgs in a single array
|
||||
vectIdxsMap->idxsMapN = idxMax;
|
||||
#endif //SPVECT_IDX_BITWISE == TRUE
|
||||
if (!(*idxMaps = calloc(vectIdxsMap->idxsMapN, sizeof(**idxMaps)))) {
|
||||
ERRPRINT("initSpVectIdxDenseAcc\tidxMaps SPVECT_IDX_BITWISE callc err\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
void checkOverallocPercent(ulong* forecastedSizes,spmat* AB){
|
||||
for (ulong r=0,rSize,forecastedSize; r < AB->M; r++){
|
||||
forecastedSize = forecastedSizes[r];
|
||||
#ifdef ROWLENS
|
||||
rSize = AB->RL[r];
|
||||
#else
|
||||
rSize = AB->IRP[r+1] - AB->IRP[r];
|
||||
#endif
|
||||
DEBUGCHECKS{
|
||||
if ( forecastedSize < rSize ){
|
||||
ERRPRINT("BAD FORECASTING\n");
|
||||
assert(forecastedSize >= rSize );
|
||||
}
|
||||
}
|
||||
DEBUGPRINT
|
||||
printf("extra forecastedSize of row: %lu\t=\t%lf %% \n",
|
||||
r,100*(forecastedSize-rSize) / (double) forecastedSize);
|
||||
}
|
||||
idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ;
|
||||
printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n",
|
||||
extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]);
|
||||
}
|
||||
int spmatDiff(spmat* A, spmat* B){
|
||||
if (A->NZ != B->NZ){
|
||||
ERRPRINT("NZ differ\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (memcmp(A->IRP,B->IRP,A->M)){
|
||||
ERRPRINT("IRP differ\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (doubleVectorsDiff(A->AS,B->AS,A->NZ,NULL)){
|
||||
ERRPRINT("AS DIFFER\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (memcmp(A->JA,B->JA,A->NZ)){
|
||||
ERRPRINT("JA differ\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
double* CSRToDense(spmat* sparseMat){
|
||||
double* denseMat;
|
||||
ulong i,j,idxNZ,denseSize;
|
||||
if (__builtin_umull_overflow(sparseMat->M,sparseMat->N,&denseSize)){
|
||||
ERRPRINT("overflow in dense allocation\n");
|
||||
return NULL;
|
||||
}
|
||||
if (!(denseMat = calloc(denseSize, sizeof(*denseMat)))){
|
||||
ERRPRINT("dense matrix alloc failed\n");
|
||||
return NULL;
|
||||
}
|
||||
for (i=0;i<sparseMat->M;i++){
|
||||
for (idxNZ=sparseMat->IRP[i]; idxNZ<sparseMat->IRP[i+1]; ++idxNZ){
|
||||
j = sparseMat->JA[idxNZ];
|
||||
//converting sparse item into dense entry
|
||||
denseMat[(ulong) IDX2D(i,j,sparseMat->N)] = sparseMat->AS[idxNZ];
|
||||
}
|
||||
}
|
||||
return denseMat;
|
||||
}
|
||||
void printSparseMatrix(spmat* spMatrix,char justNZMarkers){
|
||||
double* denseMat = CSRToDense(spMatrix);
|
||||
if (!denseMat) return;
|
||||
printMatrix(denseMat,spMatrix->M,spMatrix->N,justNZMarkers);
|
||||
free(denseMat);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline int _colsPartitioningUnifRanges_init(spmat* A,uint gridCols,
|
||||
spmat** colParts,idx_t** colPartsLens){
|
||||
|
||||
spmat* colPart;
|
||||
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *tmpJA;
|
||||
///alloc/init partitions structures
|
||||
if (!(*colParts = calloc(gridCols, sizeof(**colParts)))){
|
||||
ERRPRINT("colsPartitioningUnifRanges\tcolumns partitions of A calloc fail\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
for (ulong i=0,colBlock; i<gridCols; i++){
|
||||
colBlock = UNIF_REMINDER_DISTRI(i,_colBlock,_colBlockRem);
|
||||
colPart = *colParts + i;
|
||||
if (allocSpMatrixInternal(A->M,colBlock,colPart)){
|
||||
ERRPRINT("colsPartitioningUnifRanges\tallocSpMatrixInternal partition err\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
//TODO TODO overalloc A cols partitions NZ arrays, then realloc
|
||||
if (!(colPart->AS = malloc(A->NZ * sizeof(*A->AS)))){
|
||||
ERRPRINT("colPart of A overalloc of AS errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (!(colPart->JA = malloc(A->NZ * sizeof(*A->JA)))){
|
||||
ERRPRINT("colPart of A overalloc of JA errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
//for each A col partition -> last copied nz index = nnz copied ammount
|
||||
if (! (*colPartsLens = calloc(gridCols, sizeof(**colPartsLens))) ) {
|
||||
ERRPRINT("colsPartitioningUnifRanges: colPartsLens calloc errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,uint gridCols,
|
||||
spmat* colParts,idx_t* colPartsLens){
|
||||
|
||||
spmat* colPart;
|
||||
double* tmpAS; idx_t* tmpJA;
|
||||
//realloc overallcd A parts NZ arrays (TODO -> downsizing -> nofails?)
|
||||
for (ulong i=0,partLen; i<gridCols; i++){
|
||||
colPart = colParts + i;
|
||||
partLen = colPartsLens[i];
|
||||
if (!(tmpAS = realloc(colPart->AS,partLen*sizeof(*(colPart->AS))))){
|
||||
ERRPRINT("realloc overallocated cols partition AS array\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
colPart->AS = tmpAS;
|
||||
if (!(tmpJA = realloc(colPart->JA,partLen*sizeof(*(colPart->JA))))){
|
||||
ERRPRINT("realloc overallocated cols partition JA array\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
colPart->JA = tmpJA;
|
||||
colPart->NZ = partLen;
|
||||
colPart->IRP[A->M] = partLen;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef OFF_F
|
||||
#error generic implementation requires OFF_F defined
|
||||
#endif
|
||||
//////////////////////// CSR SPECIFIC -- TODO RENAME //////////////////
|
||||
///SPARSE MATRIX PARTITIONING
|
||||
idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){
|
||||
ulong subRowsN = A->M * gridCols;
|
||||
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols;
|
||||
ulong* offsets = malloc( (subRowsN+1) * sizeof(*offsets) );
|
||||
if (!offsets) {
|
||||
ERRPRINT("colsOffsetsPartitioningUnifRanges:\toffsets malloc errd\n");
|
||||
return NULL;
|
||||
}
|
||||
///OFFSETS COMPUTE FOR COL GROUPS -> O( A.NZ )
|
||||
for (ulong r=0, j=0; r<A->M; j=A->IRP[++r]-OFF_F){
|
||||
offsets[ IDX2D(r,0,gridCols) ] = j; //row's first gc start is costrained
|
||||
//navigate column groups inside current row
|
||||
for (ulong gc=1,gcStartCol; gc<gridCols; gc++){
|
||||
gcStartCol = UNIF_REMINDER_DISTRI_STARTIDX(gc,_colBlock,_colBlockRem);
|
||||
//goto GroupCols start entry,keeping A's nnz entries navigation (idx j)
|
||||
//for (ulong c=A->JA[j]-OFF_F; c<gcStartCol && j < A->IRP[r+1]-OFF_F; c=A->JA[++j]-OFF_F);
|
||||
while (j < A->IRP[r+1]-OFF_F && A->JA[j]-OFF_F < gcStartCol)
|
||||
j++;
|
||||
offsets[ IDX2D(r,gc,gridCols) ] = j; //row's gc group startIdx
|
||||
}
|
||||
}
|
||||
offsets[subRowsN] = A->NZ; //last row's partition end costrained
|
||||
return offsets;
|
||||
}
|
||||
|
||||
spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols,
|
||||
idx_t** colPartsOffsets)
|
||||
{
|
||||
spmat *colParts = NULL, *colPart;
|
||||
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols;
|
||||
ulong *colPartsLens=NULL, *tmpJA;
|
||||
double* tmpAS;
|
||||
|
||||
///alloc/init partitions structures
|
||||
idx_t* colOffsets = NULL;
|
||||
if (!(colOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(A,gridCols)))
|
||||
goto _err;
|
||||
if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens))
|
||||
goto _err;
|
||||
//OFFSET BASED COPY OF A.COL_GROUPS -> O( A.NZ )
|
||||
for (idx_t r=0,gcId=0; r<A->M; r++){
|
||||
for (idx_t gc=0,gcStartIdx=0,gLen=0; gc<gridCols; gc++,gcId++){
|
||||
//gcId = IDX2D(r,gc,gridCols); //seqent. scan of parts
|
||||
colPart = colParts + gc;
|
||||
gcStartIdx = colOffsets[ gcId ];
|
||||
gLen = colOffsets[ gcId+1 ] - gcStartIdx;
|
||||
colPart->IRP[r] = colPartsLens[gc]; //new line for the col partition
|
||||
//actual copy of nnz entries to colPartitions
|
||||
memcpy(colPart->AS+colPart->IRP[r], A->AS+gcStartIdx, gLen*sizeof(*A->AS));
|
||||
memcpy(colPart->JA+colPart->IRP[r], A->JA+gcStartIdx, gLen*sizeof(*A->JA));
|
||||
colPartsLens[gc] += gLen;
|
||||
#ifdef ROWLENS
|
||||
colPart->RL[r] = i;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//realloc overallcd A parts NZ arrays
|
||||
if(_colsPartitioningUnifRanges_finalRealloc(A,gridCols,colParts,colPartsLens))
|
||||
goto _err;
|
||||
|
||||
free(colPartsLens);
|
||||
if (colPartsOffsets) *colPartsOffsets = colOffsets; //save for the caller
|
||||
else free(colOffsets);
|
||||
|
||||
return colParts;
|
||||
_err:
|
||||
free(*colPartsOffsets);
|
||||
for (ulong i=0; i<gridCols; i++) freeSpmatInternal(colParts+i);
|
||||
free(colOffsets);
|
||||
free(colParts);
|
||||
free(colPartsLens);
|
||||
return NULL;
|
||||
}
|
||||
spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){
|
||||
spmat *colParts, *colPart;
|
||||
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *colPartsLens=NULL, *tmpJA;
|
||||
double* tmpAS;
|
||||
///alloc/init partitions structures
|
||||
if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens))
|
||||
goto _err;
|
||||
/* TODO
|
||||
* Parallelize: 2for collapse OMP, gcEndCol -> startIdxize, ...
|
||||
* oppure wrappare cio in static inline
|
||||
*/
|
||||
for (ulong r=0, j=0; r<A->M; j=A->IRP[++r]-OFF_F){
|
||||
//navigate column groups inside current row
|
||||
for (ulong gc=0,gcEndCol=0,i; gc<gridCols ; gc++,j+=i){
|
||||
i = 0; //@i=len current subpartition of row @r to copy
|
||||
colPart = colParts + gc;
|
||||
//NB: not shifting IRP because is handled as internal implementation component
|
||||
//But JA idx memcopied -> kept as they were originally, handled with shift in functions
|
||||
colPart->IRP[r] = colPartsLens[gc];
|
||||
gcEndCol += UNIF_REMINDER_DISTRI(gc,_colBlock,_colBlockRem);
|
||||
//goto next GroupCols,keeping A's nnz entries navigation ( index j+i )
|
||||
//for (ulong c=A->JA[j+i]-OFF_F; c<gcEndCol && j+i < A->IRP[r+1]-OFF_F; c=A->JA[j+ ++i]-OFF_F);
|
||||
while ( j+i < A->IRP[r+1]-OFF_F && A->JA[j+i]-OFF_F < gcEndCol ) i++;
|
||||
memcpy(colPart->AS+colPart->IRP[r], A->AS+j, i*sizeof(*A->AS));
|
||||
memcpy(colPart->JA+colPart->IRP[r], A->JA+j, i*sizeof(*A->JA));
|
||||
|
||||
colPartsLens[gc] += i;
|
||||
#ifdef ROWLENS
|
||||
colPart->RL[r] = i;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
//realloc overallcd A parts NZ arrays
|
||||
if(_colsPartitioningUnifRanges_finalRealloc(A,gridCols,colParts,colPartsLens))
|
||||
goto _err;
|
||||
free(colPartsLens);
|
||||
return colParts;
|
||||
_err:
|
||||
for (ulong i=0; i<gridCols; i++) freeSpmatInternal(colParts+i);
|
||||
if(colParts) free(colParts);
|
||||
if(colPartsLens) free(colPartsLens);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB,
|
||||
idx_t gridCols,idx_t* bColOffsets){
|
||||
idx_t* abColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(AB, gridCols);
|
||||
assert(abColOffsets); //partitioning error
|
||||
for (idx_t rLen,forecast,partId=0; partId<AB->M*gridCols; partId++){
|
||||
forecast = forecastedSizes[partId];
|
||||
rLen = abColOffsets[partId+1] - abColOffsets[partId];
|
||||
DEBUGCHECKS assert(forecast >= rLen);
|
||||
}
|
||||
idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ;
|
||||
printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n",
|
||||
extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]);
|
||||
|
||||
free(abColOffsets);
|
||||
|
||||
}
|
||||
|
||||
|
||||
#ifdef SPARSEUTILS_MAIN_TEST ///unit test embbeded
|
||||
|
||||
///inline export here
|
||||
//SPMV_CHUNKS_DISTR spmvChunksFair;
|
||||
spmat* allocSpMatrix(ulong rows, ulong cols);
|
||||
int allocSpMatrixInternal(ulong rows, ulong cols, spmat* mat);
|
||||
void freeSpmatInternal(spmat* mat);
|
||||
void freeSpmat(spmat* mat);
|
||||
|
||||
////INTERNAL TEST FUNCTIONS
|
||||
//test that each row's partition from colsOffsetsPartitioningUnifRanges is in the correct index range
|
||||
#include <alloca.h>
|
||||
int testColsOffsetsPartitioningUnifRanges(spmat* mat,ulong gridCols,ulong* partsOffs){
|
||||
ulong _colBlock = mat->N/gridCols, _colBlockRem = mat->N%gridCols;
|
||||
ulong j=0; //CSR scanning nnz idx
|
||||
ulong* colPartsPopulations = alloca(gridCols * sizeof(*colPartsPopulations));
|
||||
memset(colPartsPopulations,0,gridCols * sizeof(*colPartsPopulations));
|
||||
for (ulong r=0,pId=0; r<mat->M; r++){
|
||||
for (ulong gc=0,pStartIdx,pEndIdx; gc<gridCols; gc++,pId++){
|
||||
pStartIdx = UNIF_REMINDER_DISTRI_STARTIDX(gc,_colBlock,_colBlockRem);
|
||||
pEndIdx = UNIF_REMINDER_DISTRI_STARTIDX(gc+1,_colBlock,_colBlockRem)-1;
|
||||
//pId=IDX2D(r,gc,gridCols);
|
||||
for (ulong idx=partsOffs[pId],c; idx<partsOffs[pId+1]; idx++,j++){
|
||||
c = mat->JA[idx];
|
||||
assert(j == idx); //consecutive index in partitioning
|
||||
assert(pStartIdx <= c && c <= pEndIdx); //colRange
|
||||
assert(mat->IRP[r] <= idx && idx <= mat->IRP[r+1] ); //rowRange
|
||||
}
|
||||
colPartsPopulations[gc] += partsOffs[pId+1] - partsOffs[pId];
|
||||
}
|
||||
}
|
||||
assert(j == mat->NZ);
|
||||
ulong s=0;
|
||||
for (ulong gc=0,partSize; gc < gridCols; gc++,s+=partSize){
|
||||
partSize = colPartsPopulations[gc];
|
||||
double partShare=partSize/(double)mat->NZ,partsAvg=1/(double)gridCols;
|
||||
double partShareAvgDiff = partShare - partsAvg;
|
||||
printf("colPartition %lu has:\t%lu = %lf of NNZ\t\t .. %lf\tAVG diff\n",
|
||||
gc,partSize,partShare,partShareAvgDiff);
|
||||
}
|
||||
assert(s == mat->NZ); //TODO DUPLICATED
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
CONFIG Conf = {
|
||||
.gridRows = 8,
|
||||
.gridCols = 8,
|
||||
};
|
||||
|
||||
#include "parser.h"
|
||||
int main(int argc, char** argv){
|
||||
int out=EXIT_FAILURE;
|
||||
if (init_urndfd()) return out;
|
||||
if (argc < 2 ) {ERRPRINT("COO MATRIX FOR TEST"); return out;}
|
||||
////parse sparse matrix and dense vector
|
||||
spmat* mat;
|
||||
char* trgtMatrix = TMP_EXTRACTED_MARTIX;
|
||||
if (extractInTmpFS(argv[1],TMP_EXTRACTED_MARTIX) < 0)
|
||||
trgtMatrix = argv[1];
|
||||
if (!(mat = MMtoCSR(trgtMatrix))){
|
||||
ERRPRINT("err during conversion MM -> CSR\n");
|
||||
return out;
|
||||
}
|
||||
////partitioning test
|
||||
ulong* colsPartitions = colsOffsetsPartitioningUnifRanges_0(mat,Conf.gridCols);
|
||||
if (!colsPartitions) goto _free;
|
||||
if (testColsOffsetsPartitioningUnifRanges(mat,Conf.gridCols,colsPartitions))
|
||||
goto _free;
|
||||
|
||||
out=EXIT_SUCCESS;
|
||||
printf("testColsOffsetsPartitioningUnifRanges passed with "
|
||||
"mat: %lux%lu-%luNNZ\tgrid: %dx%d\n",
|
||||
mat->M,mat->N,mat->NZ,Conf.gridRows,Conf.gridCols);
|
||||
_free:
|
||||
if (colsPartitions) free(colsPartitions);
|
||||
|
||||
return out;
|
||||
}
|
||||
#endif //SPARSEUTILS_MAIN_TEST
|
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "sparseUtilsMulti.h"
|
||||
|
||||
#define OFF_F 0
|
||||
#include "sparseUtilsGeneric.c"
|
||||
#undef OFF_F
|
||||
|
||||
#define OFF_F 1
|
||||
#include "sparseUtilsGeneric.c"
|
||||
#undef OFF_F
|
@ -0,0 +1,502 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//various aux functions
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
//#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "macros.h"
|
||||
#include "sparseMatrix.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
int urndFd; //will point to urandom device file
|
||||
|
||||
///IO
|
||||
//UNBUFFERED IO
|
||||
int write_wrap(int fd,void* src,size_t count){
|
||||
ssize_t wr;
|
||||
size_t written=0;
|
||||
while (written < count){
|
||||
wr=write(fd,src+written,count-written);
|
||||
if (wr<0){
|
||||
perror("write");
|
||||
return wr;
|
||||
}
|
||||
written+=wr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int read_wrap(int fd,void* dst,size_t count){
|
||||
ssize_t rd;
|
||||
size_t readed=0;
|
||||
while (readed < count){
|
||||
rd=read(fd,dst+readed,count-readed);
|
||||
if (rd<0){
|
||||
perror("read");
|
||||
return rd;
|
||||
}
|
||||
readed+=rd;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int readALL_wrap(int fd,char** dst,size_t* count){
|
||||
ssize_t rd=!0; //to allow count > fsize
|
||||
size_t readed=0;
|
||||
char allocated=0; //flag if required *dst allocation
|
||||
if (!(*dst)){ //allocate dst buffer of same size of file
|
||||
off_t seekCurr=lseek(fd,0,SEEK_CUR);
|
||||
off_t fsize=lseek(fd,0,SEEK_END);
|
||||
if( seekCurr==-1 || fsize==-1 || lseek(fd,seekCurr,SEEK_SET)==-1){
|
||||
perror("lseek");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
*count=fsize;
|
||||
if (!(*dst=malloc(fsize))){
|
||||
fprintf(stderr,"malloc read_wrap file size buf error\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
allocated=!0;
|
||||
}
|
||||
//read loop
|
||||
while (readed < *count && rd > 0){
|
||||
rd=read(fd,(*dst)+readed,*count-readed);
|
||||
if (rd<0){
|
||||
perror("read");
|
||||
if (allocated) free(*dst);
|
||||
return rd;
|
||||
}
|
||||
readed+=rd;
|
||||
}
|
||||
if (readed < *count) (*dst)[readed]='\0'; //TODO NEEDED?
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
int init_urndfd(){ // wrap init urndFd
|
||||
if((urndFd=open(DRNG_DEVFILE,O_RDONLY))<0){
|
||||
perror("open DRNG_DEVFILE");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
int createNewFile(char* const outFpath){
|
||||
int mode=S_IRWXU;
|
||||
errno = 0;
|
||||
int outFd=open(outFpath, O_WRONLY | O_CREAT | O_TRUNC, mode);
|
||||
if (errno==EEXIST) outFd=open(outFpath, O_WRONLY | O_TRUNC, mode);
|
||||
if (outFd<0) perror("open outFd failed ");
|
||||
return outFd;
|
||||
}
|
||||
///BUFFERED IO
|
||||
//TODO series of 0 returns.... fix
|
||||
int fread_wrap(FILE* fp,void* dst,size_t count){
|
||||
int rd=0,toRead;
|
||||
size_t readed=0;
|
||||
while (readed < count){
|
||||
toRead = count - readed;
|
||||
rd=fread(dst+readed,1,toRead,fp);
|
||||
if (rd != toRead){ //TODO SHORT ITEM COUNT
|
||||
if (feof(fp)) return EOF;
|
||||
else if (ferror(fp)){
|
||||
ERRPRINT("fread_wrap errd\n");
|
||||
return -2;
|
||||
}
|
||||
//else ERRPRINT("W** SHORT ITEM COUNT RETURN MEANS!?"); //TODO OO
|
||||
}
|
||||
readed+=rd;
|
||||
}
|
||||
return rd;
|
||||
}
|
||||
///STRUCTURED DATA IO
|
||||
int writeDoubleVector(char* fpath,double* v,ulong size){
|
||||
int fd,out=EXIT_FAILURE;
|
||||
if ( (fd = createNewFile(fpath) ) < 0 ) goto _end;
|
||||
write_wrap(fd,v,size * sizeof(*v)); //raw write of vector
|
||||
out = EXIT_SUCCESS;
|
||||
DEBUG printf("written double vector into %s RAW of %lu elements\n",fpath,size);
|
||||
|
||||
_end:
|
||||
if (close(fd) == EOF) perror("close errd\n");
|
||||
return out;
|
||||
}
|
||||
|
||||
///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF
|
||||
int writeDoubleVectorAsStr(char* fpath,double* v,ulong size){
|
||||
int out=EXIT_FAILURE;
|
||||
FILE* fp = fopen(fpath,"w");
|
||||
if (!fp){
|
||||
perror("fopen vector file write");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
for (ulong i=0; i<size; i++){
|
||||
if (fprintf(fp,DOUBLE_STR_FORMAT,v[i]) < 0){
|
||||
ERRPRINT("fprintf to out vector file errd\n");
|
||||
goto _end;
|
||||
}
|
||||
}
|
||||
out = EXIT_SUCCESS;
|
||||
DEBUG printf("written vector into %s of %lu elements\n",fpath,size);
|
||||
|
||||
_end:
|
||||
if (fclose(fp) == EOF) perror("fclose errd\n");
|
||||
return out;
|
||||
|
||||
}
|
||||
|
||||
double* readDoubleVector(char* fpath,ulong* size){
|
||||
int rd,hleft;
|
||||
double *out,*tmp;
|
||||
ulong i=0,vectorSize=RNDVECTORSIZE;
|
||||
if (*size) vectorSize = *size;
|
||||
FILE* fp = fopen(fpath,"r");
|
||||
if (!fp){
|
||||
perror("fopen vector file");
|
||||
return NULL;
|
||||
}
|
||||
if (!(out = malloc(vectorSize * sizeof(*out)))){
|
||||
ERRPRINT("vector read malloc fail for file\n");
|
||||
goto _err;
|
||||
}
|
||||
while (1){
|
||||
if (i >= vectorSize ){ //realloc the array
|
||||
vectorSize *= VECTOR_STEP_REALLOC;
|
||||
if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){
|
||||
ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20);
|
||||
goto _err;
|
||||
}
|
||||
out = tmp;
|
||||
DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20);
|
||||
}
|
||||
ulong toRead = MIN(VECTOR_READ_BLOCK,(vectorSize-i));
|
||||
if((rd = fread_wrap(fp,out + i,sizeof(*out)*toRead)) == -2){
|
||||
ERRPRINT("fread_wrap errd\n");
|
||||
goto _err;
|
||||
}
|
||||
if(feof(fp)) //TODO rd == EOF not work always != W*F???
|
||||
break;
|
||||
i += rd/sizeof(out);
|
||||
if( (hleft = rd % sizeof(out)) ){
|
||||
DEBUG hprintf("half left double read... rounding down fptr\n");
|
||||
if(fseek(fp,-hleft,SEEK_CUR)){
|
||||
perror("fseek in readDoubleVector");
|
||||
goto _err;
|
||||
}
|
||||
}
|
||||
}
|
||||
//REALLOC THE ARRAY TO THE FINAL SIZE
|
||||
assert( i > 0 );
|
||||
if (!(tmp = realloc(out,*size*sizeof(*out)))){
|
||||
ERRPRINT("realloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
out = tmp;
|
||||
DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
free(out);
|
||||
out = NULL;
|
||||
_free:
|
||||
if (fclose(fp) == EOF) perror("fclose errd\n");
|
||||
return out;
|
||||
}
|
||||
|
||||
double* readDoubleVectorStr(char* fpath,ulong* size){
|
||||
int fscanfOut;
|
||||
double *out,*tmp;
|
||||
ulong i=0,vectorSize=RNDVECTORSIZE;
|
||||
if (*size) vectorSize = *size;
|
||||
FILE* fp = fopen(fpath,"r");
|
||||
if (!fp){
|
||||
perror("fopen vector file");
|
||||
return NULL;
|
||||
}
|
||||
if (!(out = malloc(vectorSize * sizeof(*out)))){
|
||||
ERRPRINT("vector read malloc fail for file\n");
|
||||
goto _err;
|
||||
}
|
||||
while (1){
|
||||
if (i >= vectorSize ){ //realloc the array
|
||||
vectorSize *= VECTOR_STEP_REALLOC;
|
||||
if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){
|
||||
ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20);
|
||||
goto _err;
|
||||
}
|
||||
out = tmp;
|
||||
DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20);
|
||||
}
|
||||
fscanfOut = fscanf(fp,DOUBLE_STR_FORMAT,out + i++ );
|
||||
if ( fscanfOut == EOF && ferror(fp)){
|
||||
perror("invalid fscanf");
|
||||
goto _err;
|
||||
}
|
||||
if ( fscanfOut != 1 || fscanfOut == EOF ) break; //end of vector
|
||||
}
|
||||
//REALLOC THE ARRAY TO THE FINAL SIZE
|
||||
assert( i > 0 );
|
||||
*size = --i;
|
||||
if (!(tmp = realloc(out,*size*sizeof(*out)))){
|
||||
ERRPRINT("realloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
out = tmp;
|
||||
DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size);
|
||||
goto _free;
|
||||
|
||||
_err:
|
||||
free(out);
|
||||
out = NULL;
|
||||
_free:
|
||||
if (fclose(fp) == EOF) perror("fclose errd\n");
|
||||
return out;
|
||||
}
|
||||
|
||||
///CFG-AUX
|
||||
int getConfig(CONFIG* conf){
|
||||
int changes=EXIT_FAILURE;
|
||||
char *varVal,*ptr;
|
||||
ulong val;
|
||||
if ((varVal = getenv(GRID_ROWS))){
|
||||
val=strtoul(varVal,&ptr,10);
|
||||
if (ptr==varVal || val>= UINT_MAX){
|
||||
perror("strtol errd");
|
||||
} else {
|
||||
conf->gridRows = val;
|
||||
}
|
||||
changes = EXIT_SUCCESS;
|
||||
}
|
||||
if ((varVal = getenv(GRID_COLS))){
|
||||
val=strtoul(varVal,&ptr,10);
|
||||
if (ptr==varVal || val>= UINT_MAX){
|
||||
perror("strtol errd");
|
||||
} else {
|
||||
conf->gridCols = val;
|
||||
}
|
||||
changes = EXIT_SUCCESS;
|
||||
}
|
||||
return changes;
|
||||
}
|
||||
|
||||
/////LIB-SORTING -- WRAPPERS
|
||||
//comparing functions
|
||||
static inline int cmp_idx_t(const void* a, const void*b){
|
||||
idx_t aa=*((ulong*) a), bb = *((ulong*) b);
|
||||
return aa==bb?0:aa>bb?1:-1;
|
||||
}
|
||||
static inline int cmpulong(const void* a, const void*b){
|
||||
ulong aa=*((ulong*) a), bb = *((ulong*) b);
|
||||
return aa==bb?0:aa>bb?1:-1;
|
||||
}
|
||||
static inline int cmpuint(const void* a, const void*b){
|
||||
uint aa=*((uint*) a), bb = *((uint*) b);
|
||||
return aa==bb?0:aa>bb?1:-1;
|
||||
}
|
||||
static inline int cmpRbNode(const void* a, const void* b){
|
||||
rbNode *aa=(rbNode*) a, *bb = (rbNode*) b;
|
||||
return cmp_idx_t(&aa->key,&bb->key);
|
||||
}
|
||||
//sorting functions
|
||||
void sort_idx_t(idx_t* arr, idx_t len){
|
||||
qsort(arr,len,sizeof(*arr),cmp_idx_t);
|
||||
}
|
||||
void sortulong(ulong* arr, ulong len){
|
||||
qsort(arr,len,sizeof(*arr),cmpulong);
|
||||
}
|
||||
void sortuint(uint* arr, uint len){
|
||||
qsort(arr,len,sizeof(*arr),cmpuint);
|
||||
}
|
||||
void sortRbNode(rbNode* arr, idx_t len){
|
||||
qsort(arr,len,sizeof(*arr),cmpRbNode);
|
||||
}
|
||||
|
||||
////Tests AUX
|
||||
inline void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen){
|
||||
if (arrLen > 0 ) return;
|
||||
for (idx_t i=1,last=arrSorted[0]; i<arrLen; last = arrSorted[i++])
|
||||
assert( arrSorted[i] != last );
|
||||
}
|
||||
///MATH UTILS
|
||||
|
||||
static inline int rndDouble_sinAll(double* d){
|
||||
if(read_wrap(urndFd,(void*) d,sizeof(*d))){
|
||||
ERRPRINT("read_wrap failed to read rnd double\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
*d = sin(*d) * MAXRND;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
long _rndHold; //permanent storage of rnd longs
|
||||
static inline int rndDouble_sinDecimal(double* d){
|
||||
if(read_wrap(urndFd,(void*) &_rndHold,sizeof(_rndHold))){
|
||||
ERRPRINT("read_wrap failed to read holder for rnd double\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
*d = (_rndHold % (ulong) ceil(MAXRND)) + sin(_rndHold);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
void statsAvgVar(double* values,uint numVals, double* out){
|
||||
double sum=0,sumSquare=0;
|
||||
for (uint i=0; i<numVals; i++){
|
||||
sum += values[i];
|
||||
sumSquare += values[i]*values[i];
|
||||
}
|
||||
out[0] = sum/numVals; //AVG
|
||||
out[1] = sumSquare/numVals - (out[0] * out[0]); //VAR
|
||||
}
|
||||
|
||||
/// MATRIX - VECTOR COMPUTE UTILS
|
||||
int fillRndVector(ulong size, double* v){
|
||||
for( ulong x=0; x<size; ++x ){
|
||||
if(rndDouble_sinAll( v+x )) return EXIT_FAILURE;
|
||||
#ifdef RNDVECTMIN
|
||||
v[x] += RNDVECTMIN;
|
||||
#endif
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
//convention true result in @a, toCheck in @b
|
||||
int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax){
|
||||
int out = EXIT_SUCCESS;
|
||||
double diff,diffAbs,_diffMax=0;
|
||||
if (diffMax) *diffMax = 0;
|
||||
else diffMax = &_diffMax;
|
||||
for (ulong i=0; i<n; i++){
|
||||
diff = a[i] - b[i];
|
||||
diffAbs = ABS( diff );
|
||||
if( diffAbs > DOUBLE_DIFF_THREASH ){
|
||||
out = EXIT_FAILURE;
|
||||
ERRPRINTS("DOUBLE VECTORS DIFF: DOUBLE_DIFF_THREASH=%lf\t<\t"
|
||||
"|%13lg| = %lf %% of @a[%lu]\n",
|
||||
DOUBLE_DIFF_THREASH,diff,100*diffAbs/ABS(a[i]),i);
|
||||
#ifdef DOUBLE_VECT_DIFF_EARLY_EXIT
|
||||
/*#pragma message("DOUBLE_VECT_DIFF_EARLY_EXIT: only first diff double reported")*/
|
||||
return EXIT_FAILURE;
|
||||
#endif
|
||||
}
|
||||
if ( ABS(*diffMax) < diffAbs ) *diffMax = diff;
|
||||
}
|
||||
DEBUG{
|
||||
printf("\nchecked diff %s"CEND" between 2 double vector of %lu elements"
|
||||
"\tmax diff: %le %s threash: %le\n", !out?CCC"OK":CCCERR"ERR!",
|
||||
n,*diffMax,*diffMax<DOUBLE_DIFF_THREASH?"<":">=",
|
||||
DOUBLE_DIFF_THREASH);
|
||||
if (!*diffMax){ //self diff check uselss TODO REMOVE
|
||||
if (!memcpy(a,b,n*sizeof(*a)))
|
||||
printf("EXACT MATCHING AMONG THE 2 DOUBLE VECTORS\n!");
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers){
|
||||
printf("printing matrix: %lu x %lu\n",m,n);
|
||||
ulong i,j;
|
||||
for (i=0;i<m;i++){
|
||||
for (j=0;j<n;j++){
|
||||
if (justNZMarkers) printf("%s",mat[IDX2D(i,j,n)]?".":" ");
|
||||
else printf("%1.1lf ",mat[IDX2D(i,j,n)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
void printVector(double* v,ulong size){
|
||||
for( ulong i=0;i<size;i++) printf("%1.1lf ",v[i]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
////VAR -- MISC
|
||||
/* search for pattern in @str from NULL terminated @patterns array
|
||||
* return pointer of pattern in @str if any, otherwise NULL
|
||||
*/
|
||||
static inline char* searchPatternsInStr(char** patterns,char* str){
|
||||
char* out = NULL;
|
||||
for (char** p=patterns; !out && *p; p++) out = strstr(str,*p);
|
||||
return out;
|
||||
}
|
||||
/* search for pattern in @str from NULL terminated @patterns array
|
||||
* return pointer of pattern in @str if any, otherwise NULL
|
||||
*/
|
||||
static inline char* searchPatternInStrs(char* pattern,char** strings){
|
||||
char* out = NULL;
|
||||
for (char** str=strings; !out && *str; str++) out = strstr(*str,pattern);
|
||||
return out;
|
||||
}
|
||||
|
||||
inline int appendArr(ulong val,APPENDARRAY* list){
|
||||
return 0; //TODO
|
||||
}
|
||||
|
||||
///DECOMPRESSION IN TMPFS ; TODO vector not puttable here... figure out nicer way..
|
||||
char* COMPRESS_EXTENSIONS[] = { ".gz", ".xz", ".bz2", ".zip", NULL};
|
||||
#define STD_DECOMPR_FLAGS " -d -c "
|
||||
char* DECOMPRESS_CMDS[] = {"gzip" STD_DECOMPR_FLAGS, "xz" STD_DECOMPR_FLAGS, "bzip2" STD_DECOMPR_FLAGS,
|
||||
"unzip -c", NULL }; //zip is too old ;)
|
||||
|
||||
int extractInTmpFS(char* path, char* tmpFsDecompressPath){
|
||||
char* ext = searchPatternsInStr(COMPRESS_EXTENSIONS,path);
|
||||
if (!ext) return -1; //NOT SUPPORTED COMPRESS EXTENSION -> TRY REGULAR PATH
|
||||
//search first 2 char after dot of ext in DECOMPRESS_CMDS to get the decompress cmd
|
||||
if (strlen(ext) < 3 ){
|
||||
ERRPRINTS("NOT SUPPORTED DECOMPRESSION:\textension %s too short to be matched",ext);
|
||||
return -1;
|
||||
}
|
||||
char extStart[3];
|
||||
extStart[0] = ext[1];
|
||||
extStart[1] = ext[2];
|
||||
extStart[2] = '\0';
|
||||
char* decompressCmdBase = searchPatternInStrs(extStart,DECOMPRESS_CMDS);
|
||||
if (!decompressCmdBase){
|
||||
ERRPRINTS("NOT SUPPORTED DECOMPRESS FOR %s\n",ext);
|
||||
return -1;
|
||||
}
|
||||
uint cmdLen = strlen(decompressCmdBase) + strlen(path) + 4 + strlen(tmpFsDecompressPath);
|
||||
char* decompressCmd = alloca(cmdLen+1);
|
||||
if (snprintf(decompressCmd,cmdLen+1,"%s %s > %s",
|
||||
decompressCmdBase,path,tmpFsDecompressPath) < 0){
|
||||
ERRPRINT("extractInTmpFS, snprintf errd\n");
|
||||
}
|
||||
VERBOSE printf("decompressing %s --> %s\ncmd:\t%s\n",path,tmpFsDecompressPath,decompressCmd);
|
||||
return system(decompressCmd);
|
||||
}
|
@ -0,0 +1,107 @@
|
||||
#include "../include/Sp3MM_CSR_OMP_Multi.h"
|
||||
#include "../include/utils.h"
|
||||
|
||||
/**
|
||||
* @brief performs multiplication of two sparse matrices
|
||||
* A and B stored in the CRS format. The resulting matrix is C
|
||||
*
|
||||
* @param[in] a_m number of columns of A
|
||||
* @param[in] a_n number of rows of A
|
||||
* @param[in] a_nz number of non zero elements of A
|
||||
* @param[in] a_as array with the non zero coefficients of A
|
||||
* @param[in] a_ja array with the column indices of the non
|
||||
* zero coefficients of A
|
||||
* @param[in] a_irp array with the indices of the beginning of
|
||||
* each row in a_as and a_ja
|
||||
* @param[in] a_rl array with the lengths of each rows of A
|
||||
* @param[in] a_max_row_nz maximum number of coefficients in a row in A
|
||||
* @param[in] b_m number of columns of B
|
||||
* @param[in] b_n number of rows of B
|
||||
* @param[in] b_nz number of non zero elements of B
|
||||
* @param[in] b_as array with the non zero coefficients of B
|
||||
* @param[in] b_ja array with the column indices of the non
|
||||
* zero coefficients of B
|
||||
* @param[in] b_irp array with the indices of the beginning of
|
||||
* each row in b_as and b_ja
|
||||
* @param[in] b_rl array with the lengths of each rows of B
|
||||
* @param[in] b_max_row_nz maximum number of coefficients in a row in B
|
||||
* @param[out] c_m number of colums of C
|
||||
* @param[out] c_n number of rows of C
|
||||
* @param[out] c_nz number of non zero elements in C
|
||||
* @param[out] c_as array with the non zero coefficients of C
|
||||
* @param[out] c_ja array with the column indices of the non
|
||||
* zero coefficients of C
|
||||
* @param[out] c_irp array with the indices of the beginning of
|
||||
* each row in c_as and c_ja
|
||||
* @param[out] c_rl array with the lengths of each rows of C
|
||||
* @param[out] c_max_row_nz maximum number of coefficients in a row in C
|
||||
* @param[out] info return value to check if the operation was successful
|
||||
*/
|
||||
#ifdef ROWLENS
|
||||
void psb_f_spmm_row_by_row_ub_0(idx_t a_m, idx_t a_n, idx_t a_nz,
|
||||
double *a_as, idx_t *a_ja,
|
||||
idx_t *a_irp, idx_t *a_rl, idx_t a_max_row_nz,
|
||||
idx_t b_m, idx_t b_n, idx_t b_nz,
|
||||
double *b_as, idx_t *b_ja,
|
||||
idx_t *b_irp, idx_t *b_rl, idx_t b_max_row_nz,
|
||||
idx_t *c_m, idx_t *c_n, idx_t *c_nz,
|
||||
double **c_as, idx_t **c_ja,
|
||||
idx_t **c_irp, idx_t **c_rl, idx_t *c_max_row_nz,
|
||||
int info)
|
||||
#else
|
||||
void psb_f_spmm_row_by_row_ub_0(idx_t a_m, idx_t a_n, idx_t a_nz,
|
||||
double *a_as, idx_t *a_ja,
|
||||
idx_t *a_irp, idx_t a_max_row_nz,
|
||||
idx_t b_m, idx_t b_n, idx_t b_nz,
|
||||
double *b_as, idx_t *b_ja,
|
||||
idx_t *b_irp, idx_t b_max_row_nz,
|
||||
idx_t *c_m, idx_t *c_n, idx_t *c_nz,
|
||||
double **c_as, idx_t **c_ja,
|
||||
idx_t **c_irp, idx_t *c_max_row_nz,
|
||||
int info)
|
||||
#endif
|
||||
{
|
||||
int rc;
|
||||
spmat *a, *b, *c;
|
||||
CONFIG *cfg;
|
||||
|
||||
#ifdef ROWLENS
|
||||
a->RL = a_rl;
|
||||
b->RL = b_rl;
|
||||
#endif//ROWLENS
|
||||
|
||||
// setting up cfg
|
||||
// TODO : CHECK THAT THIS IS COMPATIBLE WITH PSB
|
||||
rc = getConfig(cfg);
|
||||
|
||||
// setting up spmat type matrices
|
||||
a->M = a_m;
|
||||
a->N = a_n;
|
||||
a->NZ = a_nz;
|
||||
a->AS = a_as;
|
||||
a->JA = a_ja;
|
||||
a->IRP = a_irp;
|
||||
a->MAX_ROW_NZ = a_max_row_nz;
|
||||
|
||||
b->M = b_m;
|
||||
b->N = b_n;
|
||||
b->NZ = b_nz;
|
||||
b->AS = b_as;
|
||||
b->JA = b_ja;
|
||||
b->IRP = b_irp;
|
||||
b->MAX_ROW_NZ = b_max_row_nz;
|
||||
|
||||
// performing spmm
|
||||
c = spmmRowByRow_0(a, b, cfg);
|
||||
|
||||
// output result
|
||||
*(c_m) = c->M;
|
||||
*(c_n) = c->N;
|
||||
*(c_nz)= c->NZ;
|
||||
*(c_as)= c->AS;
|
||||
*(c_ja)= c->JA;
|
||||
*(c_irp)=c->IRP;
|
||||
#ifdef ROWLENS
|
||||
*(c_rl)= c->RL;
|
||||
#endif
|
||||
}
|
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
//CORE IMPLEMENTATIONS HEADER
|
||||
#ifndef SP3MM_CSR_OMP_MULTI_H
|
||||
#define SP3MM_CSR_OMP_MULTI_H
|
||||
|
||||
///commons single implementation stuff
|
||||
#include "macros.h"
|
||||
#include "sparseMatrix.h"
|
||||
|
||||
///aux structures
|
||||
//hold SPMM result over a unpartitionated space among threads-row[s' blocks]
|
||||
typedef struct{
|
||||
//space to hold SPMM output
|
||||
ulong* JA;
|
||||
double* AS;
|
||||
ulong size; //num of entries allocated -> only dbg checks
|
||||
ulong lastAssigned; //last JA&AS assigned index to an accumulator(atom)
|
||||
SPACC* accs; //SPARSIFIED ACC POINTERS
|
||||
uint accsNum;
|
||||
} SPMM_ACC; //accumulator for SPMM
|
||||
///compute function interface and its pointer definitions
|
||||
typedef spmat* ( SPMM) (spmat*,spmat*,CONFIG*);
|
||||
typedef spmat* (*SPMM_INTERF) (spmat*,spmat*,CONFIG*);
|
||||
typedef spmat* ( SP3MM) (spmat*,spmat*,spmat*,CONFIG*,SPMM_INTERF);
|
||||
typedef spmat* (*SP3MM_INTERF) (spmat*,spmat*,spmat*,CONFIG*,SPMM_INTERF);
|
||||
|
||||
typedef enum {
|
||||
_1D_DIRECT,
|
||||
_1D_BLOCKS,
|
||||
_2D_OFFSET,
|
||||
_2D_ALLOCD
|
||||
} SPMM_IMPL_TYPE;
|
||||
///-- commons single implementation stuff
|
||||
|
||||
///includes
|
||||
#include "linuxK_rbtree_minimalized.h"
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Multi.h"
|
||||
|
||||
//extern char TRGT_IMPL_START_IDX; //multi implementation switch
|
||||
#include "sparseUtilsMulti.h"
|
||||
#ifdef OFF_F //save "includer" OFF_F value before overwriting it
|
||||
#pragma push_macro("OFF_F")
|
||||
#define _OFF_F_OLD
|
||||
#undef OFF_F
|
||||
#endif
|
||||
|
||||
|
||||
#define OFF_F 0
|
||||
#include "Sp3MM_CSR_OMP_UB_Generic.h"
|
||||
#include "Sp3MM_CSR_OMP_Num_Generic.h"
|
||||
#undef OFF_F
|
||||
|
||||
#define OFF_F 1
|
||||
#include "Sp3MM_CSR_OMP_UB_Generic.h"
|
||||
#include "Sp3MM_CSR_OMP_Num_Generic.h"
|
||||
#undef OFF_F
|
||||
|
||||
|
||||
|
||||
#ifdef _OFF_F_OLD
|
||||
#pragma pop_macro("OFF_F")
|
||||
#undef _OFF_F_OLD
|
||||
#endif
|
||||
|
||||
|
||||
#endif //SP3MM_CSR_OMP_MULTI_H
|
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OFF_F
|
||||
/*#pragma message("generic implementation requires OFF_F defined")*/
|
||||
#error generic implementation requires OFF_F defined
|
||||
#endif
|
||||
|
||||
/////SYMBOLIC - NUMERIC IMPLEMENTATIONS
|
||||
///SP3MM FUNCTIONS
|
||||
/*
|
||||
* triple matrix multiplication among @R * @AC * @P using gustavson parallel implementation
|
||||
* implmented as a pair of subsequent spmm operations
|
||||
* if @conf->spmm != NULL, it will be used as spmm function, otherwise euristics will be
|
||||
* used to decide wich implementation to use
|
||||
*/
|
||||
SP3MM CAT(sp3mmRowByRowPair_SymbNum_,OFF_F);
|
||||
|
||||
/*
|
||||
* row-by-row-by-row implementation: forwarding @R*@AC rth row to P for row-by-row
|
||||
* accumulation in preallocated space, TODO exactly determined
|
||||
* basic parallelization: 1thread per @R's rows that will also forward the result to P
|
||||
*/
|
||||
SP3MM CAT(sp3mmRowByRowMerged_SymbNum_,OFF_F);
|
||||
|
||||
///SUB FUNCTIONS
|
||||
///SPMM FUNCTIONS
|
||||
/*
|
||||
* sparse parallel implementation of @A * @B parallelizing Gustavson row-by-row
|
||||
* formulation using an aux dense vector @_auxDense
|
||||
* return resulting product matrix
|
||||
*/
|
||||
SPMM CAT(spmmRowByRow_SymbNum_,OFF_F);
|
||||
/*
|
||||
* sparse parallel implementation of @A * @B parallelizing Gustavson
|
||||
* with partitioning of @A in @conf->gridRows blocks of rows
|
||||
* return resulting product matrix
|
||||
*/
|
||||
SPMM CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F);
|
||||
|
||||
/*
|
||||
* sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D
|
||||
* with partitioning of
|
||||
* @A into rows groups, uniform rows division
|
||||
* @B into cols groups, uniform cols division, accessed by aux offsets
|
||||
*/
|
||||
SPMM CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F);
|
||||
|
||||
/*
|
||||
* sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D
|
||||
* with partitioning of
|
||||
* @A into rows groups, uniform rows division
|
||||
* @B into cols groups, uniform cols division, ALLOCATED as CSR submatrixes
|
||||
*/
|
||||
SPMM CAT(spmmRowByRow2DBlocksAllocated_SymbNum_,OFF_F);
|
||||
|
||||
///implementation wrappers as static array of function pointers
|
||||
//sp3mm as pair of spmm
|
||||
static SPMM_INTERF CAT(Spmm_SymbNum_Funcs_,OFF_F)[] = {
|
||||
& CAT(spmmRowByRow_SymbNum_,OFF_F),
|
||||
& CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F),
|
||||
//& CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F),
|
||||
//& CAT(spmmRowByRow2DBlocksAllocated_SymbNum_,OFF_F)
|
||||
};
|
||||
//sp3mm as pair of spmm
|
||||
static SP3MM_INTERF CAT(Sp3mm_SymbNum_Funcs_,OFF_F)[] = {
|
||||
//& CAT(sp3mmRowByRowMerged_SymbNum_,OFF_F)
|
||||
};
|
@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
* CSR Sp[3]MM Symbolic step implementations
|
||||
* target: compute the output matrix size and the row lens for preallocation
|
||||
* direct write out partial results
|
||||
* See interfaces in respective header
|
||||
*
|
||||
* MultiImplementations functions with all parameters
|
||||
* several functions (lower level) has config based on
|
||||
* OUT_IDXS for returing or using also symbMul Output idxs somehow
|
||||
* COL_PARTS for returing or using also distribution of symb out idxs in col partition
|
||||
*/
|
||||
|
||||
///MultiImplementations
|
||||
///setup aux macros for different signatures implementation via #if arith expr
|
||||
#ifdef OUT_IDXS
|
||||
#define _OUT_IDXS TRUE
|
||||
#else
|
||||
#define _OUT_IDXS FALSE
|
||||
#define OUT_IDXS _UNDEF
|
||||
#endif
|
||||
#ifdef COL_PARTS
|
||||
#define _COL_PARTS TRUE
|
||||
#else
|
||||
#define _COL_PARTS FALSE
|
||||
#define COL_PARTS _UNDEF
|
||||
#endif
|
||||
///
|
||||
#if !defined OFF_F
|
||||
#error generic implementations requires OFF_F
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Compute symbolic product of (nnz indexes of) row @aRowJA and matrix @b
|
||||
* insert nnz indexes of the mul. result row as nodes in a rbtree rooted at @root
|
||||
* with nodes in @nodes which have to be enough for the mul result row (use an UB)
|
||||
* Retuns: multiplication result row NNZ number,se CONFIG_MACROS below for more
|
||||
*
|
||||
* CONFIG_MACROS:
|
||||
* if _OUT_IDXS == TRUE return mul.result row nnz idxs in @outIdxs
|
||||
* ifdef: OUT_IDXS_RBTREE_NODES: nnz indexes returned inplace sorting rbtree
|
||||
* as nnz indexes(JA) of the mul result row
|
||||
* else: stop at returning the mul. result row lenght
|
||||
* if _COL_PARTS == TRUE return the number of nonzero elements in
|
||||
* in each of the @gridCols column partitions inside @rowColPartsLens
|
||||
* OFF_F: offset back indexes from fortran
|
||||
* TODO also output indexes are shifted (see c_b )
|
||||
|
||||
idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F) (idx_t* aRowJA,idx_t aRowLen,
|
||||
spmat* b,rbRoot* root,rbNode* nodes
|
||||
#if _OUT_IDXS == TRUE
|
||||
#ifndef OUT_IDXS_RBTREE_NODES
|
||||
,idx_t* outIdxs
|
||||
#endif
|
||||
#endif
|
||||
#if _COL_PARTS == TRUE
|
||||
,ushort gridCols,idx_t* rowColPartsOffsets
|
||||
#endif
|
||||
);
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPVECT_IDX_DENSE_MAP based, as SpMM_Row_Symb_Rbtree but with idxMap aux idx keeping
|
||||
* CONFIG_MACROS (new)
|
||||
* IDX_RMUL_SYMB_RBTREE && ( _OUT_IDXS == T || _COL_PARTS == T ):
|
||||
* (tmp) symb mult out indexes will be kept via a rbtree
|
||||
* otherwise directly in the out array appending them and then sorting them
|
||||
* (potentially same n log n)
|
||||
|
||||
idx_t CAT4(SpMM_Row_Symb_IdxMap,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
idx_t* aRowJA, idx_t aRowLen, spmat* b, SPVECT_IDX_DENSE_MAP* idxsMapAcc
|
||||
#if _OUT_IDXS == TRUE
|
||||
,idx_t* outIdxs
|
||||
#if IDX_RMUL_SYMB_RBTREE == TRUE
|
||||
,rbRoot* root, rbNode* nodes
|
||||
#endif
|
||||
#endif // _OUT_IDXS == TRUE
|
||||
#if _COL_PARTS == TRUE
|
||||
,ushort gridCols,idx_t* rowColPartsLens
|
||||
#endif
|
||||
);
|
||||
*/
|
||||
|
||||
/*
|
||||
* Compute symbolic product of sparse matrixes @a * @b
|
||||
* Alloc aux structures based on a upper bounded allocation
|
||||
* Returns array of exact row lens of the result mul matrix c=@a*@b
|
||||
* plus an extra entry for the cumulative num of matrix nnz
|
||||
* (interoperability with upper bound implementations)
|
||||
*
|
||||
* CONFIG_MACROS:
|
||||
* if _OUT_IDXS == TRUE:
|
||||
* return in *@outIdxs pointers to overallocd JAs nnz indexes
|
||||
* *outIdxs[0] --> start of first row (its size in 0th of returned array)
|
||||
* also is the malloc returned address for later free
|
||||
* [NB: rows are not contiguos in the allocation]
|
||||
* if _COL_PARTS == TRUE: return in *@rowColPartsLens
|
||||
* a matrix @a->M * @gridCols of offsets
|
||||
* for each column partition in each row
|
||||
*/
|
||||
idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
|
||||
(
|
||||
ROW_MMSYM_IMPL_MODE symbRowImplID, spmat* a, spmat* b
|
||||
#if _OUT_IDXS == TRUE
|
||||
,idx_t*** outIdxs
|
||||
#endif
|
||||
#if _COL_PARTS == TRUE
|
||||
,ushort gridCols, idx_t** rowColPartsLens
|
||||
#endif
|
||||
);
|
||||
|
||||
//////Sp3MM - rowByrowByrow
|
||||
#define SP3MM_UB_HEURISTIC 2
|
||||
#if !defined COL_PARTS && defined OUT_IDXS
|
||||
///MultiImplementations functions without COL_PARTS
|
||||
//NB required OUT_IDXS for initial row-by-row step...
|
||||
/*
|
||||
* as earlier but meant for work with
|
||||
* triple product as rob-by-row-by-row forwarding:
|
||||
* @abRowJATmp is a tmp storage for first row-by-row sym product
|
||||
* has to be big enough to store all the possible nonzero elements
|
||||
* @nodes has to be big enough to store all possible nnz of ab and abc row
|
||||
* (not only ab row as earlier version)
|
||||
*
|
||||
*/
|
||||
idx_t CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F)
|
||||
(
|
||||
ROW_MMSYM_IMPL_MODE symbMMRowImplID, idx_t* aRowJA,idx_t aRowLen,
|
||||
spmat* b,spmat* c, rbRoot* root,rbNode* nodes, idx_t* abRowJATmp
|
||||
#if _OUT_IDXS == TRUE
|
||||
#ifndef OUT_IDXS_RBTREE_NODES
|
||||
,idx_t* outIdxs
|
||||
#endif
|
||||
#endif
|
||||
);
|
||||
|
||||
/*
|
||||
* triple product @a*@b*@c as rob-by-row-by-row forwarding:
|
||||
* Returns: resulting matrix rows' sizes exactly in an array
|
||||
* plus an extra element for the res.matrix's total size
|
||||
* CONFIG_MACROS:
|
||||
* if _OUT_IDXS == TRUE:
|
||||
* return in *@outIdxs pointers to overallocd JAs nnz indexes
|
||||
* *outIdxs[0] --> start of first row (its size in 0th of returned array)
|
||||
* also is the malloc returned address for later free
|
||||
* [NB: rows are not contiguos in the allocation]
|
||||
*/
|
||||
//TODO HEURISTICS IN _OUT_IDXS to avoid serializing after first sym.product
|
||||
// see HEURISTICS_UB
|
||||
idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F)
|
||||
(
|
||||
ROW_MMSYM_IMPL_MODE symbMMRowImplID, spmat* a, spmat* b, spmat* c
|
||||
#if _OUT_IDXS == TRUE
|
||||
,idx_t*** outIdxs
|
||||
#endif
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
#undef OUT_IDXS
|
||||
#undef _OUT_IDXS
|
||||
#undef COL_PARTS
|
||||
#undef _COL_PARTS
|
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SP3MM_CSR_OMP_SYMB_MULTI
|
||||
#define SP3MM_CSR_OMP_SYMB_MULTI
|
||||
|
||||
/* Multi implementation of symbolic product of sparse matrixes, config macros
|
||||
* OFF_F: C-Fortran spmat indexing
|
||||
* OUT_IDXS: indexes output
|
||||
* COL_PARTS: partitioning columns output...
|
||||
*/
|
||||
|
||||
#define OUT_IDXS_ON OutIdxs_
|
||||
#define COL_PARTS_ON ColParts_
|
||||
#undef OUT_IDXS
|
||||
#undef COL_PARTS
|
||||
|
||||
#define OFF_F 0
|
||||
///generate basic versions
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
|
||||
///generate outIdxs versions
|
||||
#define OUT_IDXS OUT_IDXS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
|
||||
#undef OUT_IDXS
|
||||
///generate colParts versions
|
||||
#define COL_PARTS COL_PARTS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
|
||||
//generate outIdxs AND colParts ve sions
|
||||
#define OUT_IDXS OUT_IDXS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
|
||||
|
||||
#undef OUT_IDXS
|
||||
#undef COL_PARTS
|
||||
#undef OFF_F
|
||||
#define OFF_F 1
|
||||
///generate basic versions
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
|
||||
///generate outIdxs versions
|
||||
#define OUT_IDXS OUT_IDXS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
|
||||
#undef OUT_IDXS
|
||||
///generate colParts versions
|
||||
#define COL_PARTS COL_PARTS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
|
||||
//generate outIdxs AND colParts ve sions
|
||||
#define OUT_IDXS OUT_IDXS_ON
|
||||
#include "Sp3MM_CSR_OMP_SymbStep_Generic.h"
|
||||
|
||||
#undef OUT_IDXS
|
||||
#undef COL_PARTS
|
||||
#undef OFF_F
|
||||
|
||||
#endif
|
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OFF_F
|
||||
/*#pragma message("generic implementation requires OFF_F defined")*/
|
||||
#error generic implementation requires OFF_F defined
|
||||
#endif
|
||||
|
||||
///SP3MM FUNCTIONS
|
||||
/*
|
||||
* triple matrix multiplication among @R * @AC * @P using gustavson parallel implementation
|
||||
* implmented as a pair of subsequent spmm operations
|
||||
* if @conf->spmm != NULL, it will be used as spmm function, otherwise euristics will be
|
||||
* used to decide wich implementation to use
|
||||
*/
|
||||
SP3MM CAT(sp3mmRowByRowPair_,OFF_F);
|
||||
|
||||
/*
|
||||
* row-by-row-by-row implementation: forwarding @R*@AC rth row to P for row-by-row
|
||||
* accumulation in preallocated space, TODO exactly determined
|
||||
* basic parallelization: 1thread per @R's rows that will also forward the result to P
|
||||
*/
|
||||
SP3MM CAT(sp3mmRowByRowMerged_,OFF_F);
|
||||
|
||||
///SUB FUNCTIONS
|
||||
///SPMM FUNCTIONS
|
||||
SPMM CAT(spmmSerial_,OFF_F); //mono thread version for debug oracle-less
|
||||
/*
|
||||
* sparse parallel implementation of @A * @B parallelizing Gustavson row-by-row
|
||||
* formulation using an aux dense vector @_auxDense
|
||||
* return resulting product matrix
|
||||
*/
|
||||
SPMM CAT(spmmRowByRow_,OFF_F);
|
||||
/*
|
||||
* sparse parallel implementation of @A * @B parallelizing Gustavson
|
||||
* with partitioning of @A in @conf->gridRows blocks of rows
|
||||
* return resulting product matrix
|
||||
*/
|
||||
SPMM CAT(spmmRowByRow1DBlocks_,OFF_F);
|
||||
|
||||
/*
|
||||
* sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D
|
||||
* with partitioning of
|
||||
* @A into rows groups, uniform rows division
|
||||
* @B into cols groups, uniform cols division, accessed by aux offsets
|
||||
*/
|
||||
SPMM CAT(spmmRowByRow2DBlocks_,OFF_F);
|
||||
|
||||
/*
|
||||
* sparse parallel implementation of @A * @B as Gustavson parallelizzed in 2D
|
||||
* with partitioning of
|
||||
* @A into rows groups, uniform rows division
|
||||
* @B into cols groups, uniform cols division, ALLOCATED as CSR submatrixes
|
||||
*/
|
||||
SPMM CAT(spmmRowByRow2DBlocksAllocated_,OFF_F);
|
||||
|
||||
///implementation wrappers as static array of function pointers
|
||||
//sp3mm as pair of spmm
|
||||
static SPMM_INTERF CAT(Spmm_UB_Funcs_,OFF_F)[] = {
|
||||
& CAT(spmmRowByRow_,OFF_F),
|
||||
& CAT(spmmRowByRow1DBlocks_,OFF_F),
|
||||
& CAT(spmmRowByRow2DBlocks_,OFF_F),
|
||||
& CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)
|
||||
};
|
||||
//sp3mm as direct product
|
||||
static SP3MM_INTERF CAT(Sp3mm_UB_Funcs_,OFF_F)[] = {
|
||||
& CAT(sp3mmRowByRowMerged_,OFF_F)
|
||||
};
|
@ -0,0 +1,406 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
///scalar-vector multiply
|
||||
//TODO che guadagno si ha ad utilizzare solo la versione generica delle successive 2 funzioni
|
||||
/*
|
||||
* Sparse vector part <->scalar multiplication in a dense output
|
||||
* sparse vector part will hold nnz values in @vectVals (AS subvector)
|
||||
* with corresponding indexes in @vectIdxs in range [0,@vectLen] (JA subvector)
|
||||
* resulting vector accumulated in a dense array in @aux->v, along with nnzIdx
|
||||
* both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen
|
||||
*/
|
||||
inline void CAT(scSparseVectMul_,OFF_F)
|
||||
(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux){
|
||||
for (ulong i=0,j; i<vectLen; i++){
|
||||
j = vectIdxs[i]-OFF_F;
|
||||
DEBUGCHECKS{
|
||||
if (j>=aux->vLen){
|
||||
fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen);
|
||||
assert(j < aux->vLen);
|
||||
}
|
||||
}
|
||||
aux->v[j] += vectVals[i] * scalar; //accumulate
|
||||
//append new nonzero index to auxVNNZeroIdxs for quick sparsify
|
||||
//if (!(aux->v[j])) aux->nnzIdx[ aux->nnzIdxMap.len++ ] = j; //TODO numerical zero may then cause readd
|
||||
if(!spVect_idx_in(j,&aux->nnzIdxMap))
|
||||
aux->nnzIdx[ aux->nnzIdxMap.len-1 ] = j;
|
||||
}
|
||||
}
|
||||
|
||||
/* Same as above scSparseVectMul_ , but consider an initial offset to remove from each idx
|
||||
* Sparse vector part <->scalar multiplication in a dense output
|
||||
* @vectVals: sparse vector part values ( from spmat.AS )
|
||||
* with [at least] @vectLen corresponding
|
||||
* target idxs in @vectIdxs (from spmat.JA ) starting from @startIdx
|
||||
*
|
||||
* Resulting vector accumulated in a dense array in @aux->v, along with nnzIdx
|
||||
* all nnz values indexes will be shifted back of @startIdx in @aux
|
||||
* both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen
|
||||
*/
|
||||
inline void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,
|
||||
ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux){
|
||||
for (ulong i=0,j; i<vectLen; i++){
|
||||
j = vectIdxs[i]-OFF_F - startIdx;
|
||||
DEBUGCHECKS{
|
||||
if (j>=aux->vLen){
|
||||
fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen);
|
||||
assert(j < aux->vLen);
|
||||
}
|
||||
}
|
||||
aux->v[j] += vectVals[i] * scalar; //accumulate
|
||||
//append new nonzero index to auxVNNZeroIdxs for quick sparsify
|
||||
//if (!(aux->v[j])) aux->nnzIdx[ aux->nnzIdxMap.len++ ] = j;
|
||||
////TODO numerical zero may then cause readd
|
||||
if(!spVect_idx_in(j,&aux->nnzIdxMap))
|
||||
aux->nnzIdx[ aux->nnzIdxMap.len-1 ] = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///MultiImplementations functions
|
||||
#ifndef OFF_F
|
||||
#error generic implementations requires OFF_F defined
|
||||
#endif
|
||||
|
||||
///TODO IMPLEMENT SCALAR<->ROW MUL AS GENERIC SPARSE VECTOR<->SCALAR MUL
|
||||
//inline void scSparseRowMul(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){
|
||||
inline void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){
|
||||
ulong rowStartIdx = mat->IRP[trgtR]-OFF_F,rowLen;
|
||||
#ifdef ROWLENS
|
||||
rowLen = mat->RL[trgtR];
|
||||
#else
|
||||
rowLen = mat->IRP[trgtR+1] - mat->IRP[trgtR];
|
||||
#endif
|
||||
CAT(scSparseVectMul_,OFF_F)(scalar,mat->AS+rowStartIdx,mat->JA+rowStartIdx,rowLen,aux);
|
||||
//TODO check impact of generic version use
|
||||
}
|
||||
|
||||
|
||||
///OUTPUT SIZE PREDICTION
|
||||
|
||||
/* O(A.NZ [+B.M] )
|
||||
* return array of upper bounded row sizes of the product @A * @B
|
||||
* also appended at the end for the cumulative total size of the matrix AB = A*B
|
||||
*/
|
||||
inline idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B){
|
||||
AUDIT_INTERNAL_TIMES Start = omp_get_wtime();
|
||||
idx_t* rowSizes = calloc((A->M+1), sizeof(*rowSizes));
|
||||
if (!rowSizes){
|
||||
ERRPRINT("spMMSizeUpperbound: rowSizes calloc errd\n");
|
||||
return NULL;
|
||||
}
|
||||
idx_t fullMatBound = 0;
|
||||
#pragma omp parallel for schedule(static) reduction(+:fullMatBound)
|
||||
for (idx_t r=0; r<A->M; r++){
|
||||
for (idx_t jj=A->IRP[r] - OFF_F,j,rlen; jj<A->IRP[r+1] - OFF_F; jj++){
|
||||
j = A->JA[jj] - OFF_F;
|
||||
#ifdef ROWLENS
|
||||
rlen = B->RL[j];
|
||||
#else
|
||||
rlen = B->IRP[j+1] - B->IRP[j]; //OFF_F: delta is offset independent
|
||||
#endif
|
||||
rowSizes[r] += rlen;
|
||||
fullMatBound += rlen;
|
||||
//rowSizes[A->M] += rlen; //just below omp reduction sum
|
||||
}
|
||||
}
|
||||
rowSizes[A->M] = fullMatBound;
|
||||
AUDIT_INTERNAL_TIMES End= omp_get_wtime();
|
||||
VERBOSE
|
||||
printf("spMMSizeUpperbound:%lu\t%le s\n",rowSizes[A->M],End-Start);
|
||||
return rowSizes;
|
||||
}
|
||||
|
||||
/* O(A.NZ + B.NZ)
|
||||
* return matrix @A.M x gridCols of upper bound
|
||||
* for each of the @gridCols col partitions of the output matrix AB = @A * @B
|
||||
* also appended at the end for the cumulative total size of the matrix AB
|
||||
*/
|
||||
inline idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)
|
||||
(spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets){
|
||||
AUDIT_INTERNAL_TIMES Start = omp_get_wtime();
|
||||
|
||||
idx_t* rowPartsSizes = calloc((A->M*gridCols +1), sizeof(*rowPartsSizes));
|
||||
if (!rowPartsSizes){
|
||||
ERRPRINT("spMMSizeUpperbound: rowPartsSizes calloc errd\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
idx_t fullMatBound = 0;
|
||||
#pragma omp parallel for schedule(static) reduction(+:fullMatBound)
|
||||
for (idx_t r=0; r<A->M; r++){
|
||||
//for each A.row -> sum B.colParts lens
|
||||
for (idx_t jj=A->IRP[r]-OFF_F,j,rlen; jj<A->IRP[r+1]-OFF_F; jj++){
|
||||
j = A->JA[jj] - OFF_F;
|
||||
for (idx_t gc=0,bPartID=IDX2D(j,gc,gridCols); gc < gridCols; gc++,bPartID++){
|
||||
rlen = bColPartOffsets[bPartID+1] - bColPartOffsets[bPartID];
|
||||
rowPartsSizes[ IDX2D(r,gc,gridCols) ] += rlen;
|
||||
fullMatBound += rlen;
|
||||
}
|
||||
}
|
||||
}
|
||||
rowPartsSizes[ A->M*gridCols ] = fullMatBound;
|
||||
AUDIT_INTERNAL_TIMES End= omp_get_wtime();
|
||||
VERBOSE
|
||||
printf("spMMSizeUpperboundColParts_:%lu\t%le s\n",rowPartsSizes[A->M],End-Start);
|
||||
return rowPartsSizes;
|
||||
}
|
||||
|
||||
|
||||
////Single implementations funcs
|
||||
#ifndef SPMMUTILS_H_SINGLE_IMPLEMENTATION
|
||||
#define SPMMUTILS_H_SINGLE_IMPLEMENTATION
|
||||
|
||||
#include "utils.h"
|
||||
///Allocs - Free
|
||||
//SpMM holder of accumulats
|
||||
inline SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum){
|
||||
SPMM_ACC* out = calloc(1,sizeof(*out));
|
||||
if (!out){
|
||||
ERRPRINT("initSpMMAcc: out calloc errd\n");
|
||||
return NULL;
|
||||
}
|
||||
out->size = entriesNum;
|
||||
if (!(out->JA = malloc(entriesNum * sizeof(*(out->JA))))){
|
||||
ERRPRINT("initSpMMAcc: JA malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(out->AS = malloc(entriesNum * sizeof(*(out->AS))))){
|
||||
ERRPRINT("initSpMMAcc: AS malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
if (!(out->accs = malloc(accumulatorsNum * sizeof(*(out->accs))))){
|
||||
ERRPRINT("initSpMMAcc: accs malloc errd\n");
|
||||
goto _err;
|
||||
}
|
||||
return out;
|
||||
|
||||
_err:
|
||||
if (out->JA) free(out->JA);
|
||||
if (out->AS) free(out->AS);
|
||||
if (out->accs) free(out->accs);
|
||||
if (out) free(out);
|
||||
return NULL;
|
||||
}
|
||||
inline void freeSpMMAcc(SPMM_ACC* acc){
|
||||
free(acc->JA);
|
||||
free(acc->AS);
|
||||
free(acc->accs);
|
||||
free(acc);
|
||||
}
|
||||
///// dense acc sparsify functions
|
||||
/*
|
||||
* sparsify dense accumulated vector @accV (with shifted of @startColAcc)
|
||||
* into sparse accumulator @accSparse that'll use space for nnz entries from @acc
|
||||
*/
|
||||
///DIRECT OUT MATRIX SPARSIFY
|
||||
/*
|
||||
* sparsify @accV directly inside row @row of matrix @m
|
||||
* considering, if given not NULL, 2D partitioning with
|
||||
* @gridCols cols groups and colGroups offsets per row matrix @colPartsOffsets
|
||||
* :Returns inplace modify of @m
|
||||
*/
|
||||
static inline void sparsifyDirect(ACC_DENSE* accV,spmat* m,idx_t row){
|
||||
idx_t nnz = accV->nnzIdxMap.len, sparsifyStart = m->IRP[row], sparsifyEnd = m->IRP[row+1];
|
||||
sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write
|
||||
DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz);
|
||||
DEBUGCHECKS assert(nnz <= (sparsifyEnd - sparsifyStart));
|
||||
|
||||
for (idx_t i=0,j; i < nnz; i++){
|
||||
j = accV->nnzIdx[i];
|
||||
m -> JA[sparsifyStart + i] = j; //+ startColAcc;
|
||||
m -> AS[sparsifyStart + i] = accV->v[j];
|
||||
}
|
||||
}
|
||||
//TODO 2D partitioning - colParts version of above...best to use multi impl trick of symbStep
|
||||
static inline void sparsifyDirectColParts(ACC_DENSE* accV,spmat* m,idx_t row,
|
||||
ushort colGroupId,ushort gridCols, idx_t* colPartsOffsets,idx_t startCol){
|
||||
idx_t nnz = accV->nnzIdxMap.len;
|
||||
ushort partID = IDX2D(row,colGroupId,gridCols);
|
||||
idx_t sparsifyStart = colPartsOffsets[partID], sparsifyEnd = colPartsOffsets[partID+1];
|
||||
sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write
|
||||
DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz);
|
||||
DEBUGCHECKS assert( nnz == sparsifyEnd - sparsifyStart );
|
||||
sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write
|
||||
|
||||
for (idx_t i=0,j; i < nnz; i++){
|
||||
j = accV->nnzIdx[i];
|
||||
m -> JA[sparsifyStart + i] = j + startCol;
|
||||
m -> AS[sparsifyStart + i] = accV->v[j];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
///UB SPACE SPARSIFY
|
||||
//internal sparsivy dense acc inside (prepared) sparse acc struct
|
||||
static inline void _sparsifyUB(ACC_DENSE* accV,SPACC* accSparse,idx_t startColAcc){
|
||||
idx_t nnz = accV->nnzIdxMap.len;
|
||||
sort_idx_t(accV->nnzIdx,nnz); //sort nnz idx for ordered write
|
||||
DEBUGCHECKS assertArrNoRepetitions(accV->nnzIdx,nnz);
|
||||
for (idx_t i=0,j; i < nnz; i++){
|
||||
j = accV->nnzIdx[i];
|
||||
accSparse -> JA[i] = j + startColAcc;
|
||||
accSparse -> AS[i] = accV->v[j];
|
||||
}
|
||||
accSparse -> len = nnz;
|
||||
}
|
||||
|
||||
//row[Part] sparsified in a thread safe (exactly long) reserved area using atomics
|
||||
static inline void sparsifyUBNoPartsBounds
|
||||
(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc){
|
||||
//sort nnz indexes of dense accumulator
|
||||
idx_t nnz = accV->nnzIdxMap.len;
|
||||
idx_t sparsifyStartV; //start index(inside @accSparse) of @accV to sparsify
|
||||
//sparsifyStartV = __atomic_fetch_add(&(acc->lastAssigned),nnz,__ATOMIC_ACQ_REL);
|
||||
#pragma omp atomic capture
|
||||
{ //fetch and add like ....
|
||||
sparsifyStartV = acc->lastAssigned;
|
||||
acc->lastAssigned += nnz;
|
||||
}
|
||||
DEBUGCHECKS{
|
||||
if (acc->lastAssigned >= acc->size){
|
||||
ERRPRINT("OMP ATOMIC OR SG ERRD IN SPACE ASSIGNMENTS...\n");
|
||||
assert(acc->lastAssigned < acc->size);
|
||||
}
|
||||
}
|
||||
//
|
||||
accSparse -> AS = acc->AS + sparsifyStartV;
|
||||
accSparse -> JA = acc->JA + sparsifyStartV;
|
||||
_sparsifyUB(accV,accSparse,startColAcc);
|
||||
}
|
||||
////output-gather functons
|
||||
/*
|
||||
* merge @conf->gridCols*@mat->M sparse rows partitions into @mat
|
||||
* EXPECTED rowsParts @rowsParts to be sorted in accord to the
|
||||
* 2D rowMajor computing grid given in @conf
|
||||
* allocd arrays to hold non zero values and indexes into @mat
|
||||
*/
|
||||
inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,
|
||||
CONFIG* conf){
|
||||
ulong nzNum=0,j,rLen,idx,partsNum = mat->M * conf->gridCols;
|
||||
//TODO PARALLEL MERGE - SAVE STARTING offset OF EACH PARTITION IN THE OUT MATRIX
|
||||
ulong* rowsPartsOffsets=alloca(partsNum*sizeof(*rowsPartsOffsets));
|
||||
///count nnz entries and alloc arrays for them
|
||||
for (ulong r=0; r<mat->M; r++){
|
||||
//for each partition ->get len -> outMat.IRP and aux offsets
|
||||
for (j=0,rLen=0; j<conf->gridCols; j++){
|
||||
idx = IDX2D(r,j,conf->gridCols);
|
||||
rowsPartsOffsets[idx]=nzNum+rLen;//part start=prev accumulated end
|
||||
rLen += rowsParts[idx].len;
|
||||
}
|
||||
nzNum += rLen;
|
||||
mat->IRP[r+1] = nzNum;
|
||||
#ifdef ROWLENS
|
||||
mat->RL[r] = rLen;
|
||||
#endif
|
||||
}
|
||||
mat->NZ = nzNum;
|
||||
if (!(mat->AS = malloc(nzNum * sizeof(*(mat->AS))))){
|
||||
ERRPRINT("merged sparse matrix AS alloc errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (!(mat->JA = malloc(nzNum * sizeof(*(mat->JA))))){
|
||||
ERRPRINT("merged sparse matrix JA alloc errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
///popolate with rows nnz values and indexes
|
||||
ulong pLen; //omp for aux vars
|
||||
#pragma omp parallel for schedule(static) private(pLen)
|
||||
for (ulong i=0; i<partsNum; i++){
|
||||
pLen = rowsParts[i].len;
|
||||
memcpy(mat->AS + rowsPartsOffsets[i],rowsParts[i].AS,pLen*sizeof(*(mat->AS)));
|
||||
memcpy(mat->JA + rowsPartsOffsets[i],rowsParts[i].JA,pLen*sizeof(*(mat->JA)));
|
||||
}
|
||||
CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually
|
||||
for (ulong i=0,w=0; i<mat->M; i++){
|
||||
if (mat->IRP[i] != w)
|
||||
{ERRPRINT("MERGE ROW ERR IRP\n");return -1;}
|
||||
for (j=0; j<conf->gridCols; j++){
|
||||
SPACC r = rowsParts[IDX2D(i,j,conf->gridCols)];
|
||||
for (ulong jj=0; jj<r.len; jj++,w++){
|
||||
if (mat->AS[w]!= r.AS[jj]){
|
||||
ERRPRINT("MERGE ROW ERR AS\n"); return -1;}
|
||||
if (mat->JA[w]!= r.JA[jj]){
|
||||
ERRPRINT("MERGE ROW ERR JA\n"); return -1;}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* merge @mat->M sparse rows @rows in sparse matrix @mat
|
||||
* EXPECTED @rows to be sorted in accord to trgt matrix row index @r
|
||||
* allocd arrays to hold non zero values and indexes into @mat
|
||||
*/
|
||||
inline int mergeRows(SPACC* rows,spmat* mat){
|
||||
ulong nzNum=0;
|
||||
//count nnz entries and alloc arrays for them
|
||||
for (ulong r=0; r<mat->M; ++r){
|
||||
nzNum += rows[r].len;
|
||||
mat->IRP[r+1] = nzNum;
|
||||
#ifdef ROWLENS
|
||||
mat->RL[r] = rows[r].len
|
||||
#endif
|
||||
}
|
||||
mat->NZ = nzNum;
|
||||
if (!(mat->AS = malloc(nzNum * sizeof(*(mat->AS))))){
|
||||
ERRPRINT("merged sparse matrix AS alloc errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (!(mat->JA = malloc(nzNum * sizeof(*(mat->JA))))){
|
||||
ERRPRINT("merged sparse matrix JA alloc errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
///POPOLATE WITH ROWS NNZ VALUES AND INDEXES
|
||||
//TODO PARALLEL COPY
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (ulong r=0; r<mat->M; r++){
|
||||
memcpy(mat->AS+mat->IRP[r], rows[r].AS, rows[r].len*sizeof(*(mat->AS)));
|
||||
memcpy(mat->JA+mat->IRP[r], rows[r].JA, rows[r].len*sizeof(*(mat->JA)));
|
||||
}
|
||||
CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually
|
||||
for (ulong r=0,i=0; r<mat->M; r++){
|
||||
if (i != mat->IRP[r])
|
||||
{ERRPRINT("MERGE ROW ERR IRP\n");return -1;}
|
||||
for (ulong j=0; j<rows[r].len; j++,i++){
|
||||
if (mat->AS[i]!= rows[r].AS[j]){
|
||||
ERRPRINT("MERGE ROW ERR AS\n"); return -1;}
|
||||
if (mat->JA[i] != rows[r].JA[j]){
|
||||
ERRPRINT("MERGE ROW ERR JA\n"); return -1;}
|
||||
}
|
||||
}
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
#endif //SPMMUTILS_H_SINGLE_IMPLEMENTATION
|
||||
|
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SPMMUTILSMULTI_H
|
||||
#define SPMMUTILSMULTI_H
|
||||
|
||||
extern char TRGT_IMPL_START_IDX; //multi implementation switch
|
||||
|
||||
#ifdef OFF_F //save "includer" OFF_F value before overwriting it
|
||||
#pragma push_macro("OFF_F")
|
||||
#define _OFF_F_OLD
|
||||
#undef OFF_F
|
||||
#endif
|
||||
|
||||
#define OFF_F 0
|
||||
#include "SpMMUtilsGeneric.h"
|
||||
#undef OFF_F
|
||||
|
||||
#define OFF_F 1
|
||||
#include "SpMMUtilsGeneric.h"
|
||||
#undef OFF_F
|
||||
|
||||
#ifdef _OFF_F_OLD
|
||||
#pragma pop_macro("OFF_F")
|
||||
#undef _OFF_F_OLD
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
//CONFIG STRUCT DEPENDECIES
|
||||
//switch among diferent symb row -> rows implementation var
|
||||
typedef enum{
|
||||
RBTREE,
|
||||
IDXMAP
|
||||
} ROW_MMSYM_IMPL_MODE;
|
||||
|
||||
//
|
||||
typedef struct{
|
||||
ushort gridRows;
|
||||
ushort gridCols;
|
||||
//TODO FULL CONFIG DOCCED HERE
|
||||
ROW_MMSYM_IMPL_MODE symbMMRowImplID; //how to compute symb mul (if required)
|
||||
uint threadNum; //thread num to use in an OMP parallel region ...
|
||||
void* chunkDistrbFunc; //CHUNKS_DISTR_INTERF func pntr
|
||||
} CONFIG;
|
||||
///Smart controls
|
||||
typedef size_t idx_t; //spmat indexes
|
||||
typedef unsigned __int128 uint128;
|
||||
|
||||
#include "macros.h"
|
||||
#ifndef SPARSIFY_PRE_PARTITIONING
|
||||
#define SPARSIFY_PRE_PARTITIONING TRUE //u.b. implementation will sparsify dense acc in a pre splitted mem area
|
||||
#endif
|
||||
///AUDIT&CHECKS
|
||||
//debug checks and tmp stuff
|
||||
#ifndef DEBUG
|
||||
#define DEBUG if( TRUE )
|
||||
#endif
|
||||
//long prints
|
||||
#ifndef DEBUGPRINT
|
||||
#define DEBUGPRINT if( FALSE )
|
||||
#endif
|
||||
//heavy impact debug checks
|
||||
#ifndef DEBUGCHECKS
|
||||
#define DEBUGCHECKS if( FALSE )
|
||||
#endif
|
||||
//extra print in the normal output
|
||||
#ifndef AUDIT_INTERNAL_TIMES
|
||||
#define AUDIT_INTERNAL_TIMES if( TRUE )
|
||||
#endif
|
||||
#ifndef VERBOSE
|
||||
#define VERBOSE if( FALSE )
|
||||
#endif
|
||||
//extra checks over the imput and correct partials
|
||||
#ifndef CONSISTENCY_CHECKS
|
||||
#define CONSISTENCY_CHECKS if( TRUE )
|
||||
#endif
|
||||
#ifndef SPVECT_IDX_BITWISE //SPVECT_IDX_DENSE_ACC.nnzIdxsFlags will be an array of bitflags
|
||||
#define SPVECT_IDX_BITWISE TRUE
|
||||
#endif
|
||||
#if SPVECT_IDX_BITWISE == TRUE
|
||||
#ifndef LIMB_T
|
||||
#define LIMB_T uint128
|
||||
#endif
|
||||
typedef LIMB_T limb_t;
|
||||
typedef limb_t* nnz_idxs_flags_t;
|
||||
#define LIMB_SIZE_BIT ( sizeof(limb_t) * 8 )
|
||||
#else //nnz idxs ar flags in a byte arry
|
||||
typedef uchar* nnz_idxs_flags_t;
|
||||
#define LIMB_SIZE_BIT ( sizeof(uchar) * 8 )
|
||||
#endif
|
||||
|
||||
///AUDIT extra configuration
|
||||
//#define ROWLENS
|
||||
#ifdef ROWLENS
|
||||
/*#pragma message("ROW_LENS ARRAY ENABLED")*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
* idxsMapAcc based symb row*rows, outIdxs and colParts carried with an aux rbtree
|
||||
* otherwise carried throught an append array
|
||||
* (hyp same computational cost... n + nlog n)
|
||||
*/
|
||||
#ifndef IDX_RMUL_SYMB_RBTREE
|
||||
#define IDX_RMUL_SYMB_RBTREE FALSE
|
||||
#endif
|
||||
#ifndef RB_CACHED_INSERT
|
||||
#define RB_CACHED_INSERT TRUE //use cached insert
|
||||
#endif
|
||||
//#define USE_RB_ROOT_CACHE_LMOST //use leftmost leaf cached in rbtree in sym mul
|
||||
///CONSTS
|
||||
#define ELL_MAX_ENTRIES ( 6l << 27 ) //2*6GB of ell (padded) entries maxSupport in a matrix
|
||||
#define LIMIT_ELL_SIZE //enable above threshold
|
||||
#define ELL_AS_FILLER (0 ) //handled with calloc
|
||||
//TODO NOW FILLED WITH LAST NNPADDED COL #define ELL_JA_FILLER (-1)
|
||||
//#define DOUBLE_VECT_DIFF_EARLY_EXIT 1
|
||||
//#define RNDVECTMIN 222222
|
||||
#define VECTOR_STEP_REALLOC 25
|
||||
#define VECTOR_READ_BLOCK 50 //file (raw) vector read block
|
||||
#define RNDVECTORSIZE 100000
|
||||
#define RNDVECTORDUMP TMPDIR "rndVectorDump"
|
||||
#define RNDVECTORDUMPRAW TMPDIR "rndVectorDumpRaw"
|
||||
#define OUTVECTORDUMP TMPDIR "outVectorDump"
|
||||
#define OUTVECTORDUMPRAW TMPDIR "outVectorDumpRaw"
|
||||
//#define FLOAT_DIFF_ABS
|
||||
#ifndef AVG_TIMES_ITERATION
|
||||
#define AVG_TIMES_ITERATION 5
|
||||
#endif
|
||||
//ompChunksDivide.h -> chunksFairFolded()
|
||||
#ifndef FAIR_CHUNKS_FOLDING
|
||||
#define FAIR_CHUNKS_FOLDING 4
|
||||
#endif
|
||||
//SPMV specific
|
||||
//rows partitions for dotProduct SIMD reduction enable
|
||||
#ifndef SIMD_ROWS_REDUCTION
|
||||
#define SIMD_ROWS_REDUCTION TRUE
|
||||
#endif
|
||||
/*#if SIMD_ROWS_REDUCTION == TRUE
|
||||
#pragma message("SIMD_ROWS_REDUCTION enabled")
|
||||
//TODO SOME TRICK TO HAVE 1! PRINT
|
||||
#endif*/
|
||||
extern double Start,End,Elapsed,ElapsedInternal;
|
||||
#define DOUBLE_DIFF_THREASH 7e-4
|
||||
#define DRNG_DEVFILE "/dev/urandom"
|
||||
#define MAXRND 3e-5
|
||||
#ifndef TMPDIR
|
||||
#define TMPDIR "/tmp/"
|
||||
#endif
|
||||
#define TMP_EXTRACTED_MARTIX TMPDIR "extractedMatrix"
|
||||
|
||||
#endif //CONFIG_H
|
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//inline export - single implmentation functions
|
||||
void cleanRbNodes(rbRoot* root,rbNode* nodes,idx_t nodesNum);
|
||||
int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key);
|
||||
//void C_FortranShiftIdxs(spmat* outMat);
|
||||
//void Fortran_C_ShiftIdxs(spmat* m);
|
||||
ACC_DENSE* _initAccVectors(ulong num,ulong size);
|
||||
ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size); //TODO PERF WITH NEXT
|
||||
SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum);
|
||||
idx_t reductionMaxSeq(idx_t* arr,idx_t arrLen);
|
||||
int _allocAccDense(ACC_DENSE* v,ulong size);
|
||||
int mergeRows(SPACC* rows,spmat* mat);
|
||||
int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,CONFIG* conf);
|
||||
void _freeAccsDenseChecks(ACC_DENSE* vectors,ulong num);
|
||||
void _resetAccVect(ACC_DENSE* acc);
|
||||
void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc);
|
||||
void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen);
|
||||
void freeAccsDense(ACC_DENSE* vectors,ulong num);
|
||||
void freeSpMMAcc(SPMM_ACC* acc);
|
||||
void sparsifyDenseVect(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc);
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "macros.h"
|
||||
|
||||
#ifndef OFF_F
|
||||
#error generic implementation requires OFF_F defined
|
||||
#endif
|
||||
|
||||
////inline exports
|
||||
//multi implmentation functions
|
||||
void CAT(scSparseVectMul_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux);
|
||||
void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux);
|
||||
void CAT(_scRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux);
|
||||
void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux);
|
||||
idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B);
|
||||
idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)(spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets);
|
@ -0,0 +1,335 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
Red Black Trees
|
||||
(C) 1999 Andrea Arcangeli <andrea@suse.de>
|
||||
|
||||
|
||||
Userspace GNUC porting,del augmented deps and minimalizing for few OPs only: Andrea Di Iorio
|
||||
linux/include/linux/rbtree.h
|
||||
|
||||
To use rbtrees you'll have to implement your own insert and search cores.
|
||||
This will avoid us to use callbacks and to drop drammatically performances.
|
||||
I know it's not the cleaner way, but in C (not in C++) to get
|
||||
performances and genericity...
|
||||
|
||||
See Documentation/core-api/rbtree.rst for documentation and samples.
|
||||
*/
|
||||
/*
|
||||
* RedBlackTree_linux_userspace
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* https://bitbucket.org/andysnake96/redblacktree_linux_userspace
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _LINUX_RBTREE_H
|
||||
#define _LINUX_RBTREE_H
|
||||
|
||||
//#include <linux/kernel.h> //TODO LESS_DEPENDENCIES
|
||||
//#include <linux/stddef.h> //TODO LESS_DEPENDENCIES
|
||||
//#include <linux/rcupdate.h> //TODO LESS_DEPENDENCIES
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <macros.h> ///my usefull macros :)
|
||||
#include <macrosLinuxMock.h> ///my usefull macros :)
|
||||
|
||||
struct rb_node {
|
||||
unsigned long __rb_parent_color;
|
||||
struct rb_node *rb_right;
|
||||
struct rb_node *rb_left;
|
||||
} __attribute__((aligned(sizeof(long))));
|
||||
/* The alignment might seem pointless, but allegedly CRIS needs it */
|
||||
|
||||
struct rb_root {
|
||||
struct rb_node *rb_node;
|
||||
};
|
||||
/////MINIMALIZE fulfilling rbtree_augmented deps TODO
|
||||
///rb_node colors labels
|
||||
#define RB_RED 0
|
||||
#define RB_BLACK 1
|
||||
#define __rb_parent(pc) ((struct rb_node *)(pc & ~3))
|
||||
|
||||
#define __rb_color(pc) ((pc) & 1)
|
||||
#define __rb_is_black(pc) __rb_color(pc)
|
||||
#define __rb_is_red(pc) (!__rb_color(pc))
|
||||
#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
|
||||
#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
|
||||
#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
|
||||
|
||||
|
||||
static inline void rb_set_parent_color(struct rb_node *rb,
|
||||
struct rb_node *p, int color)
|
||||
{
|
||||
rb->__rb_parent_color = (unsigned long)p | color;
|
||||
}
|
||||
///rb_set_parent //TODO GENERIC ADDONLY
|
||||
|
||||
static inline void
|
||||
__rb_change_child(struct rb_node *old, struct rb_node *new,
|
||||
struct rb_node *parent, struct rb_root *root)
|
||||
{
|
||||
if (parent) {
|
||||
if (parent->rb_left == old)
|
||||
WRITE_ONCE(parent->rb_left, new);
|
||||
else
|
||||
WRITE_ONCE(parent->rb_right, new);
|
||||
} else
|
||||
WRITE_ONCE(root->rb_node, new);
|
||||
}
|
||||
|
||||
/////////////////////////////
|
||||
|
||||
#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
|
||||
|
||||
#define RB_ROOT (struct rb_root) { NULL, }
|
||||
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
|
||||
|
||||
#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL)
|
||||
|
||||
/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
|
||||
#define RB_EMPTY_NODE(node) \
|
||||
((node)->__rb_parent_color == (unsigned long)(node))
|
||||
#define RB_CLEAR_NODE(node) \
|
||||
((node)->__rb_parent_color = (unsigned long)(node))
|
||||
|
||||
|
||||
extern void rb_insert_color(struct rb_node *, struct rb_root *);
|
||||
extern void rb_erase(struct rb_node *, struct rb_root *);
|
||||
|
||||
|
||||
/* Find logical next and previous nodes in a tree */
|
||||
extern struct rb_node *rb_next(const struct rb_node *);
|
||||
extern struct rb_node *rb_prev(const struct rb_node *);
|
||||
extern struct rb_node *rb_first(const struct rb_root *);
|
||||
extern struct rb_node *rb_last(const struct rb_root *);
|
||||
|
||||
/* Postorder iteration - always visit the parent after its children */
|
||||
extern struct rb_node *rb_first_postorder(const struct rb_root *);
|
||||
extern struct rb_node *rb_next_postorder(const struct rb_node *);
|
||||
|
||||
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
|
||||
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
|
||||
struct rb_root *root);
|
||||
extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
|
||||
struct rb_root *root);
|
||||
|
||||
|
||||
static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
|
||||
struct rb_node **rb_link)
|
||||
{
|
||||
node->__rb_parent_color = (unsigned long)parent;
|
||||
node->rb_left = node->rb_right = NULL;
|
||||
|
||||
*rb_link = node;
|
||||
}
|
||||
|
||||
/*static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
|
||||
struct rb_node **rb_link)
|
||||
{
|
||||
node->__rb_parent_color = (unsigned long)parent;
|
||||
node->rb_left = node->rb_right = NULL;
|
||||
|
||||
rcu_assign_pointer(*rb_link, node);
|
||||
}**/ //TODO LESS_DEPENDENCIES
|
||||
|
||||
#define rb_entry_safe(ptr, type, member) \
|
||||
({ typeof(ptr) ____ptr = (ptr); \
|
||||
____ptr ? rb_entry(____ptr, type, member) : NULL; \
|
||||
})
|
||||
|
||||
/**
|
||||
* rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
|
||||
* given type allowing the backing memory of @pos to be invalidated
|
||||
*
|
||||
* @pos: the 'type *' to use as a loop cursor.
|
||||
* @n: another 'type *' to use as temporary storage
|
||||
* @root: 'rb_root *' of the rbtree.
|
||||
* @field: the name of the rb_node field within 'type'.
|
||||
*
|
||||
* rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
|
||||
* list_for_each_entry_safe() and allows the iteration to continue independent
|
||||
* of changes to @pos by the body of the loop.
|
||||
*
|
||||
* Note, however, that it cannot handle other modifications that re-order the
|
||||
* rbtree it is iterating over. This includes calling rb_erase() on @pos, as
|
||||
* rb_erase() may rebalance the tree, causing us to miss some nodes.
|
||||
*/
|
||||
#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
|
||||
for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
|
||||
pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
|
||||
typeof(*pos), field); 1; }); \
|
||||
pos = n)
|
||||
|
||||
/*
|
||||
* Leftmost-cached rbtrees.
|
||||
*
|
||||
* We do not cache the rightmost node based on footprint
|
||||
* size vs number of potential users that could benefit
|
||||
* from O(1) rb_last(). Just not worth it, users that want
|
||||
* this feature can always implement the logic explicitly.
|
||||
* Furthermore, users that want to cache both pointers may
|
||||
* find it a bit asymmetric, but that's ok.
|
||||
*/
|
||||
struct rb_root_cached {
|
||||
struct rb_root rb_root;
|
||||
struct rb_node *rb_leftmost;
|
||||
};
|
||||
|
||||
#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
|
||||
|
||||
/* Same as rb_first(), but O(1) */
|
||||
#define rb_first_cached(root) (root)->rb_leftmost
|
||||
|
||||
static inline void rb_insert_color_cached(struct rb_node *node,
|
||||
struct rb_root_cached *root,
|
||||
bool leftmost)
|
||||
{
|
||||
if (leftmost)
|
||||
root->rb_leftmost = node;
|
||||
rb_insert_color(node, &root->rb_root);
|
||||
}
|
||||
|
||||
static inline void rb_erase_cached(struct rb_node *node,
|
||||
struct rb_root_cached *root)
|
||||
{
|
||||
if (root->rb_leftmost == node)
|
||||
root->rb_leftmost = rb_next(node);
|
||||
rb_erase(node, &root->rb_root);
|
||||
}
|
||||
|
||||
static inline void rb_replace_node_cached(struct rb_node *victim,
|
||||
struct rb_node *new,
|
||||
struct rb_root_cached *root)
|
||||
{
|
||||
if (root->rb_leftmost == victim)
|
||||
root->rb_leftmost = new;
|
||||
rb_replace_node(victim, new, &root->rb_root);
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
/***END OF ORIGINAL LINUX KERNEL HEADER MINIMALIZED
|
||||
***FEW AUX FUNCS TO SUPPORT SpMM-Symbolic
|
||||
***/
|
||||
#include <string.h>
|
||||
#include "config.h"
|
||||
typedef struct{
|
||||
idx_t key;
|
||||
struct rb_node rb;
|
||||
|
||||
/* following fields used for testing augmented rbtree functionality
|
||||
u32 val;
|
||||
u32 augmented; ///only for AUGMENTED_TEST
|
||||
*/
|
||||
} rbNode;
|
||||
//static struct rb_root_cached root = RB_ROOT_CACHED;
|
||||
|
||||
typedef struct rb_root_cached rbRoot;
|
||||
|
||||
|
||||
/*
|
||||
* return 1 if @node with the given key @key
|
||||
* has been inserted in rbtree rooted at @root; 0 otherwise
|
||||
*/
|
||||
static inline int rbInsertStdNewKey(rbRoot *root,rbNode *node, idx_t key)
|
||||
{
|
||||
struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
|
||||
idx_t parentK;
|
||||
|
||||
while (*new) {
|
||||
parent = *new;
|
||||
parentK = rb_entry(parent, rbNode, rb)->key;
|
||||
if (key < parentK) new = &parent->rb_left;
|
||||
else if (key > parentK) new = &parent->rb_right;
|
||||
else return 0; //already in
|
||||
DEBUGCHECKS assert( *new != parent );
|
||||
}
|
||||
//insert the node in the correct position, assigning the key
|
||||
/*DEBUGCHECKS{ //check for double insertion
|
||||
rbNode testNode;
|
||||
memset(&testNode,0,sizeof(testNode));
|
||||
assert( !memcmp(node,&testNode,sizeof(*node)) );
|
||||
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n))
|
||||
assert( n != *new );
|
||||
}*/
|
||||
|
||||
node->key = key;
|
||||
rb_link_node(&node->rb, parent, new);
|
||||
rb_insert_color(&node->rb, &root->rb_root);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int rbInsertCachedNewKey(rbRoot *root,rbNode *node, idx_t key)
|
||||
{
|
||||
struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
|
||||
idx_t parentK;
|
||||
bool leftmost = true;
|
||||
|
||||
while (*new) {
|
||||
parent = *new;
|
||||
parentK = rb_entry(parent, rbNode, rb)->key;
|
||||
if (key < parentK) new = &parent->rb_left;
|
||||
else if (key > parentK) {
|
||||
new = &parent->rb_right;
|
||||
leftmost = false;
|
||||
}
|
||||
else return 0;
|
||||
}
|
||||
|
||||
//insert the node in the correct position, assigning the key
|
||||
/*DEBUGCHECKS{ //check for double insertion
|
||||
rbNode testNode;
|
||||
memset(&testNode,0,sizeof(testNode));
|
||||
assert( !memcmp(node,&testNode,sizeof(*node)) );
|
||||
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n))
|
||||
assert( n != *new );
|
||||
}*/
|
||||
|
||||
node->key = key;
|
||||
rb_link_node(&node->rb, parent, new);
|
||||
rb_insert_color_cached(&node->rb, root, leftmost);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key){
|
||||
#if RB_CACHED_INSERT == TRUE
|
||||
return rbInsertCachedNewKey(root,node,key);
|
||||
#else
|
||||
return rbInsertStdNewKey(root,node,key);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define rbNodeOrderedVisit(n,root) \
|
||||
for (n = rb_first(&root->rb_root); n; n = rb_next(n));
|
||||
|
||||
inline void cleanRbNodes(rbRoot* root,rbNode* nodes,idx_t nodesNum){
|
||||
memset(nodes,0,nodesNum * sizeof(*nodes));
|
||||
memset(root,0,sizeof(*root));
|
||||
}
|
||||
#endif /* _LINUX_RBTREE_H */
|
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef MACROS
|
||||
#define MACROS
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
///aux macro-functions
|
||||
#define ABS(a) ((a) > 0 ? (a) : -(a))
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
#define AVG(a,b) ((a)/2 + (b)/2 + ((a)%2+(b)%2)/2)
|
||||
#define SWAP(a,b) (a)=(a)^(b);(b)=(b)^(a);(a)=(a)^(b)
|
||||
#define IN_RANGE(i,start,end) ( (start) <= (i) && (i) <= (end) )
|
||||
|
||||
//ceil(x/y) with integers
|
||||
#define INT_DIV_CEIL(x,y) ( ( (x) - 1) / (y) + 1 )
|
||||
//2D ROW MAJOR indexing wrap compute
|
||||
#define IDX2D(i,j,nCols) ((j) + (i)*(nCols))
|
||||
|
||||
///distribuite reminder @rem in group givin an extra +1 to the first @rem
|
||||
#define UNIF_REMINDER_DISTRI(i,div,rem) \
|
||||
( (div) + ( (i) < (rem) ? 1 : 0 ) )
|
||||
#define UNIF_REMINDER_DISTRI_STARTIDX(i,div,rem) \
|
||||
( (i) * (div) + MIN( (i),(rem) ) )
|
||||
#define UNIF_REMINDER_DISTRI_ENDIDX(i,div,rem) \
|
||||
( (i+1) * (div) + MIN( (i),(rem) ) )
|
||||
//shorter name alias
|
||||
#define unifRemShare(i,div,rem) UNIF_REMINDER_DISTRI(i,div,rem)
|
||||
#define unifRemShareStart(i,div,rem) UNIF_REMINDER_DISTRI_STARTIDX(i,div,rem)
|
||||
#define unifRemShareEnd(i,div,rem) UNIF_REMINDER_DISTRI_ENDIDX( (i) , (div) , (rem) )
|
||||
#define unifRemShareBlock(i,div,rem) unifRemShareStart(i,div,rem), unifRemShare(i,div,rem)
|
||||
|
||||
#define STATIC_ARR_ELEMENTS_N(arr) (sizeof( (arr) ) / (sizeof(*(arr))))
|
||||
////STRING UTILS
|
||||
#define _STR(s) #s
|
||||
#define STR(s) _STR(s)
|
||||
///CONCATENATE
|
||||
//Concatenate preprocessor tokens A and B WITHOUT expanding macro definitions
|
||||
#define _CAT(a,b) a ## b
|
||||
#define _CAT3(a,b,c) a ## b ## c
|
||||
#define _CAT4(a,b,c,d) a ## b ## c ## d
|
||||
//Concatenate preprocessor tokens A and B EXPANDING macro definitions
|
||||
#define CAT(a,b) _CAT(a,b)
|
||||
#define CAT3(a,b,c) _CAT3(a,b,c)
|
||||
#define CAT4(a,b,c,d) _CAT4(a,b,c,d)
|
||||
|
||||
#define _UNDEF _
|
||||
////PRINTS
|
||||
#define CHIGHLIGHT "\33[1m\33[92m"
|
||||
#define CCC CHIGHLIGHT
|
||||
#define CHIGHLIGHTERR "\33[31m\33[1m\33[44m"
|
||||
#define CCCERR CHIGHLIGHTERR
|
||||
#define CEND "\33[0m"
|
||||
#define hprintsf(str,...) printf( CHIGHLIGHT str CEND,__VA_ARGS__ )
|
||||
#define hprintf(str) printf( CHIGHLIGHT str CEND)
|
||||
#define ERRPRINTS(str,...) fprintf( stderr, CHIGHLIGHTERR str CEND,__VA_ARGS__ )
|
||||
#define ERRPRINT(str) fprintf( stderr, CHIGHLIGHTERR str CEND )
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
///aux types
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
typedef char bool;
|
||||
#define TRUE (!0)
|
||||
#define FALSE 0
|
||||
#define true TRUE
|
||||
#define false FALSE
|
||||
#define T TRUE
|
||||
#define F FALSE
|
||||
//smart decimal type custom precision def build macro _DECIMAL_TRGT_PREC
|
||||
#ifndef _DECIMAL_TRGT_PREC
|
||||
//dflt floating point precision & formatting chars
|
||||
#define _DECIMAL_TRGT_PREC double
|
||||
#define _DECIMAL_TRGT_PREC_PR "%lf"
|
||||
#else
|
||||
//TODO SELECT WITH RESPECT TO THE EXPORTED TARGET DECIMAL TYPE
|
||||
#define _DECIMAL_TRGT_PREC_PR "%f"
|
||||
#endif
|
||||
typedef _DECIMAL_TRGT_PREC decimal;
|
||||
|
||||
|
||||
///EXTRA INCLUDE --- cuda
|
||||
///assertionn are disabled at compile time by defining the NDEBUG preprocessor macro before including assert.h s
|
||||
//#ifdef ASSERT #include <assert.h> #endif
|
||||
|
||||
#endif //MACROS
|
@ -0,0 +1,140 @@
|
||||
/*
|
||||
* RedBlackTree_linux_userspace
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MACROSLINUXMOCK
|
||||
#define MACROSLINUXMOCK
|
||||
|
||||
////////////////////////// LINUX KERNEL Userspaceing RBTree//////////////////////////////
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned int cycles_t;
|
||||
|
||||
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
|
||||
|
||||
/**
|
||||
* container_of - cast a member of a structure out to the containing structure
|
||||
* @ptr: the pointer to the member.
|
||||
* @type: the type of the container struct this is embedded in.
|
||||
* @member: the name of the member within the struct.
|
||||
* TODO LESS_DEPENDENCIES
|
||||
removed:
|
||||
BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
|
||||
!__same_type(*(ptr), void), \
|
||||
"pointer type mismatch in container_of()"); \
|
||||
|
||||
*/
|
||||
#define container_of(ptr, type, member) ({ \
|
||||
void *__mptr = (void *)(ptr); \
|
||||
((type *)(__mptr - offsetof(type, member))); })
|
||||
|
||||
#define NOOP(x) x
|
||||
//#define NOOP do {} while(0)
|
||||
#undef unlikely
|
||||
#define unlikely(x) NOOP(x)
|
||||
/*
|
||||
* Yes, this permits 64-bit accesses on 32-bit architectures. These will
|
||||
* actually be atomic in some cases (namely Armv7 + LPAE), but for others we
|
||||
* rely on the access being split into 2x32-bit accesses for a 32-bit quantity
|
||||
* (e.g. a virtual address) and a strong prevailing wind.
|
||||
*/
|
||||
#define compiletime_assert_rwonce_type(t) \
|
||||
compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
|
||||
"Unsupported access size for {READ,WRITE}_ONCE().")
|
||||
///TODO OVERWRITTEN FOR PORTING
|
||||
#undef compiletime_assert_rwonce_type
|
||||
#define compiletime_assert_rwonce_type(t) NOOP(t)
|
||||
/*
|
||||
* Use __READ_ONCE() instead of READ_ONCE() if you do not require any
|
||||
* atomicity. Note that this may result in tears!
|
||||
*/
|
||||
#ifndef __READ_ONCE
|
||||
#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x))
|
||||
#endif
|
||||
|
||||
#define READ_ONCE(x) \
|
||||
({ \
|
||||
compiletime_assert_rwonce_type(x); \
|
||||
__READ_ONCE(x); \
|
||||
})
|
||||
|
||||
#define __WRITE_ONCE(x, val) \
|
||||
do { \
|
||||
*(volatile typeof(x) *)&(x) = (val); \
|
||||
} while (0)
|
||||
|
||||
/*//TODO ORIGNAL VERSION
|
||||
#define WRITE_ONCE(x, val) \
|
||||
do { \
|
||||
compiletime_assert_rwonce_type(x); \
|
||||
__WRITE_ONCE(x, val); \
|
||||
} while (0)
|
||||
*/
|
||||
#define WRITE_ONCE(x,val) x = val
|
||||
|
||||
/*
|
||||
* Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a
|
||||
* word from memory atomically but without telling KASAN/KCSAN. This is
|
||||
* usually used by unwinding code when walking the stack of a running process.
|
||||
*/
|
||||
#define READ_ONCE_NOCHECK(x) \
|
||||
({ \
|
||||
compiletime_assert(sizeof(x) == sizeof(unsigned long), \
|
||||
"Unsupported access size for READ_ONCE_NOCHECK()."); \
|
||||
(typeof(x))__read_once_word_nocheck(&(x)); \
|
||||
})
|
||||
|
||||
#include <x86intrin.h> //rdtsc
|
||||
static inline cycles_t get_cycles(void)
|
||||
{
|
||||
|
||||
/**#ifndef CONFIG_X86_TSC
|
||||
if (!boot_cpu_has(X86_FEATURE_TSC))
|
||||
return 0;
|
||||
#endif */ ///TODO LESS_DEPENDENCIES
|
||||
|
||||
return __rdtsc();
|
||||
}
|
||||
|
||||
/**#define WARN_ON_ONCE(condition) ({ \
|
||||
static bool __section(".data.once") __warned; \
|
||||
int __ret_warn_once = !!(condition); \
|
||||
\
|
||||
if (unlikely(__ret_warn_once && !__warned)) { \
|
||||
__warned = true; \
|
||||
WARN_ON(1); \
|
||||
} \
|
||||
unlikely(__ret_warn_once); \
|
||||
}) */ //TODO LESS_DEPENDENCIES
|
||||
#include <assert.h>
|
||||
#define WARN_ON_ONCE(condition) assert( !(condition) )
|
||||
|
||||
#define div_u64(a,b) ( a / b )
|
||||
#define kfree(x) free(x)
|
||||
|
||||
#endif //MACROSLINUXMOCK
|
@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Matrix Market I/O library for ANSI C
|
||||
*
|
||||
* See http://math.nist.gov/MatrixMarket for details.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MM_IO_H
|
||||
#define MM_IO_H
|
||||
|
||||
#define MM_MAX_LINE_LENGTH 1025
|
||||
#define MatrixMarketBanner "%%MatrixMarket"
|
||||
#define MM_MAX_TOKEN_LENGTH 64
|
||||
|
||||
typedef char MM_typecode[4];
|
||||
|
||||
char *mm_typecode_to_str(MM_typecode matcode);
|
||||
|
||||
int mm_read_banner(FILE *f, MM_typecode *matcode);
|
||||
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);
|
||||
int mm_read_mtx_array_size(FILE *f, int *M, int *N);
|
||||
|
||||
int mm_write_banner(FILE *f, MM_typecode matcode);
|
||||
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz);
|
||||
int mm_write_mtx_array_size(FILE *f, int M, int N);
|
||||
|
||||
|
||||
/********************* MM_typecode query fucntions ***************************/
|
||||
|
||||
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
|
||||
|
||||
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
|
||||
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
|
||||
#define mm_is_dense(typecode) ((typecode)[1]=='A')
|
||||
#define mm_is_array(typecode) ((typecode)[1]=='A')
|
||||
|
||||
#define mm_is_complex(typecode) ((typecode)[2]=='C')
|
||||
#define mm_is_real(typecode) ((typecode)[2]=='R')
|
||||
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
|
||||
#define mm_is_integer(typecode) ((typecode)[2]=='I')
|
||||
|
||||
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
|
||||
#define mm_is_general(typecode) ((typecode)[3]=='G')
|
||||
#define mm_is_skew(typecode) ((typecode)[3]=='K')
|
||||
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
|
||||
|
||||
int mm_is_valid(MM_typecode matcode); /* too complex for a macro */
|
||||
|
||||
|
||||
/********************* MM_typecode modify fucntions ***************************/
|
||||
|
||||
#define mm_set_matrix(typecode) ((*typecode)[0]='M')
|
||||
#define mm_set_coordinate(typecode) ((*typecode)[1]='C')
|
||||
#define mm_set_array(typecode) ((*typecode)[1]='A')
|
||||
#define mm_set_dense(typecode) mm_set_array(typecode)
|
||||
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
|
||||
|
||||
#define mm_set_complex(typecode)((*typecode)[2]='C')
|
||||
#define mm_set_real(typecode) ((*typecode)[2]='R')
|
||||
#define mm_set_pattern(typecode)((*typecode)[2]='P')
|
||||
#define mm_set_integer(typecode)((*typecode)[2]='I')
|
||||
|
||||
|
||||
#define mm_set_symmetric(typecode)((*typecode)[3]='S')
|
||||
#define mm_set_general(typecode)((*typecode)[3]='G')
|
||||
#define mm_set_skew(typecode) ((*typecode)[3]='K')
|
||||
#define mm_set_hermitian(typecode)((*typecode)[3]='H')
|
||||
|
||||
#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
|
||||
(*typecode)[2]=' ',(*typecode)[3]='G')
|
||||
|
||||
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
|
||||
|
||||
|
||||
/********************* Matrix Market error codes ***************************/
|
||||
|
||||
|
||||
#define MM_COULD_NOT_READ_FILE 11
|
||||
#define MM_PREMATURE_EOF 12
|
||||
#define MM_NOT_MTX 13
|
||||
#define MM_NO_HEADER 14
|
||||
#define MM_UNSUPPORTED_TYPE 15
|
||||
#define MM_LINE_TOO_LONG 16
|
||||
#define MM_COULD_NOT_WRITE_FILE 17
|
||||
|
||||
|
||||
/******************** Matrix Market internal definitions ********************
|
||||
|
||||
MM_matrix_typecode: 4-character sequence
|
||||
|
||||
ojbect sparse/ data storage
|
||||
dense type scheme
|
||||
|
||||
string position: [0] [1] [2] [3]
|
||||
|
||||
Matrix typecode: M(atrix) C(oord) R(eal) G(eneral)
|
||||
A(array) C(omplex) H(ermitian)
|
||||
P(attern) S(ymmetric)
|
||||
I(nteger) K(kew)
|
||||
|
||||
***********************************************************************/
|
||||
|
||||
#define MM_MTX_STR "matrix"
|
||||
#define MM_ARRAY_STR "array"
|
||||
#define MM_DENSE_STR "array"
|
||||
#define MM_COORDINATE_STR "coordinate"
|
||||
#define MM_SPARSE_STR "coordinate"
|
||||
#define MM_COMPLEX_STR "complex"
|
||||
#define MM_REAL_STR "real"
|
||||
#define MM_INT_STR "integer"
|
||||
#define MM_GENERAL_STR "general"
|
||||
#define MM_SYMM_STR "symmetric"
|
||||
#define MM_HERM_STR "hermitian"
|
||||
#define MM_SKEW_STR "skew-symmetric"
|
||||
#define MM_PATTERN_STR "pattern"
|
||||
|
||||
|
||||
/* high level routines */
|
||||
|
||||
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode);
|
||||
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode);
|
||||
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
|
||||
MM_typecode matcode);
|
||||
|
||||
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
|
||||
double **val_, int **I_, int **J_);
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OMPCHUNKSDIVIDE
|
||||
#define OMPCHUNKSDIVIDE
|
||||
|
||||
/*
|
||||
* chunks distribution for runtime schedule setted to dynamic, TODO guided
|
||||
* generic interface with elements, srcMatrix, configuration
|
||||
* TODO the srcMatrix is leaved for advanced chunk distribution...
|
||||
* configuration is expected to have a valid number of threadNum setted
|
||||
*/
|
||||
#include "config.h"
|
||||
//distribution of @rows|blocks of @matrix, exploiting @config
|
||||
typedef void (CHUNKS_DISTR ) (ulong,spmat*,CONFIG*);
|
||||
typedef void (*CHUNKS_DISTR_INTERF ) (ulong,spmat*,CONFIG*);
|
||||
|
||||
//NOOP chunks division for manual configuration via export OMP_SCHEDULE
|
||||
inline void chunksNOOP(ulong r,spmat* mat,CONFIG* cfg){ return; }
|
||||
#include "ompGetICV.h"
|
||||
//fair division of @r elements from matrix @mat with threads num in @cfg
|
||||
inline void chunksFair(ulong r,spmat* mat,CONFIG* cfg){
|
||||
assert(cfg->threadNum > 0); //configured target thread num
|
||||
omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic;
|
||||
omp_get_schedule(&kind,&chunk_size);
|
||||
k = kind;
|
||||
#if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0
|
||||
monotonic = omp_sched_monotonic & kind;
|
||||
if(monotonic) k = kind-omp_sched_monotonic;
|
||||
#endif
|
||||
(void) monotonic; //else no unused warning
|
||||
switch(k){
|
||||
case omp_sched_static :
|
||||
DEBUG printf("static it's already fair\n");
|
||||
return;
|
||||
case omp_sched_dynamic:
|
||||
chunk_size_new = MAX(1,r/cfg->threadNum);
|
||||
break;
|
||||
//case omp_sched_guided :
|
||||
//case omp_sched_auto :
|
||||
//default:
|
||||
}
|
||||
if(chunk_size == chunk_size_new) return;
|
||||
omp_set_schedule(kind,chunk_size_new);
|
||||
VERBOSE printf("chunksFair:\tchunk adapted to %d\n",chunk_size_new);
|
||||
DEBUG ompGetRuntimeSchedule(NULL);
|
||||
}
|
||||
//fair division of @r elements from matrix @mat of threads in @cfg
|
||||
//subdividing the fair share with a factor of @FAIR_CHUNKS_FOLDING
|
||||
inline void chunksFairFolded(ulong r,spmat* mat,CONFIG* cfg){
|
||||
assert(cfg->threadNum > 0); //configured target thread num
|
||||
omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic;
|
||||
omp_get_schedule(&kind,&chunk_size);
|
||||
k = kind;
|
||||
#if _OPENMP >= 201811 //OMP_SCHED_SCHEDULE modifier from 5.0
|
||||
monotonic = omp_sched_monotonic & kind;
|
||||
if(monotonic) k = kind-omp_sched_monotonic;
|
||||
#endif
|
||||
(void) monotonic; //else no unused warning
|
||||
switch(k){
|
||||
case omp_sched_static :
|
||||
DEBUG printf("static it's already fair\n");
|
||||
return;
|
||||
case omp_sched_dynamic:
|
||||
chunk_size_new = MAX(1,r/(cfg->threadNum*FAIR_CHUNKS_FOLDING));
|
||||
break;
|
||||
//case omp_sched_guided :
|
||||
//case omp_sched_auto :
|
||||
//default:
|
||||
}
|
||||
if(chunk_size == chunk_size_new) return;
|
||||
omp_set_schedule(kind,chunk_size_new);
|
||||
DEBUG{ //check with ICV get
|
||||
printf("chunksFairFolded:\tchunk adapted to %d\n",chunk_size_new);
|
||||
ompGetRuntimeSchedule(NULL);
|
||||
}
|
||||
}
|
||||
#endif
|
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OMPGETICV_H
|
||||
#define OMPGETICV_H
|
||||
//only header definitions
|
||||
/*
|
||||
* log sched configuration on stdout
|
||||
* return kind,monotonic,chunkSize if arg0 not NULL
|
||||
*/
|
||||
void ompGetRuntimeSchedule(int* );
|
||||
void ompGetAllICV(); //only if not OMP_GET_ICV_MAIN
|
||||
float ompVersionMacroMap(); //version number as float using API dates mappings
|
||||
#endif
|
@ -0,0 +1,193 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2021 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2021 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved.
|
||||
* Copyright (c) 2011-2020 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2013 INRIA. All rights reserved.
|
||||
* Copyright (c) 2015 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2021 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017-2019 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
|
||||
* Copyright (c) 2021-2022 Google, LLC. All rights reserved.
|
||||
* Copyright (c) 2021-2022 Amazon.com, Inc. or its affiliates. All Rights
|
||||
* reserved.
|
||||
* Copyright (c) 2021 Bull S.A.S. All rights reserved.
|
||||
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||
* Copyright (c) 2018-2021 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/*
|
||||
* Error classes and codes
|
||||
* Do not change the values of these without also modifying mpif.h.in.
|
||||
*/
|
||||
#define MPI_SUCCESS 0
|
||||
#define MPI_ERR_BUFFER 1
|
||||
#define MPI_ERR_COUNT 2
|
||||
#define MPI_ERR_TYPE 3
|
||||
#define MPI_ERR_TAG 4
|
||||
#define MPI_ERR_COMM 5
|
||||
#define MPI_ERR_RANK 6
|
||||
#define MPI_ERR_REQUEST 7
|
||||
#define MPI_ERR_ROOT 8
|
||||
#define MPI_ERR_GROUP 9
|
||||
#define MPI_ERR_OP 10
|
||||
#define MPI_ERR_TOPOLOGY 11
|
||||
#define MPI_ERR_DIMS 12
|
||||
#define MPI_ERR_ARG 13
|
||||
#define MPI_ERR_UNKNOWN 14
|
||||
#define MPI_ERR_TRUNCATE 15
|
||||
#define MPI_ERR_OTHER 16
|
||||
#define MPI_ERR_INTERN 17
|
||||
#define MPI_ERR_IN_STATUS 18
|
||||
#define MPI_ERR_PENDING 19
|
||||
#define MPI_ERR_ACCESS 20
|
||||
#define MPI_ERR_AMODE 21
|
||||
#define MPI_ERR_ASSERT 22
|
||||
#define MPI_ERR_BAD_FILE 23
|
||||
#define MPI_ERR_BASE 24
|
||||
#define MPI_ERR_CONVERSION 25
|
||||
#define MPI_ERR_DISP 26
|
||||
#define MPI_ERR_DUP_DATAREP 27
|
||||
#define MPI_ERR_FILE_EXISTS 28
|
||||
#define MPI_ERR_FILE_IN_USE 29
|
||||
#define MPI_ERR_FILE 30
|
||||
#define MPI_ERR_INFO_KEY 31
|
||||
#define MPI_ERR_INFO_NOKEY 32
|
||||
#define MPI_ERR_INFO_VALUE 33
|
||||
#define MPI_ERR_INFO 34
|
||||
#define MPI_ERR_IO 35
|
||||
#define MPI_ERR_KEYVAL 36
|
||||
#define MPI_ERR_LOCKTYPE 37
|
||||
#define MPI_ERR_NAME 38
|
||||
#define MPI_ERR_NO_MEM 39
|
||||
#define MPI_ERR_NOT_SAME 40
|
||||
#define MPI_ERR_NO_SPACE 41
|
||||
#define MPI_ERR_NO_SUCH_FILE 42
|
||||
#define MPI_ERR_PORT 43
|
||||
#define MPI_ERR_QUOTA 44
|
||||
#define MPI_ERR_READ_ONLY 45
|
||||
#define MPI_ERR_RMA_CONFLICT 46
|
||||
#define MPI_ERR_RMA_SYNC 47
|
||||
#define MPI_ERR_SERVICE 48
|
||||
#define MPI_ERR_SIZE 49
|
||||
#define MPI_ERR_SPAWN 50
|
||||
#define MPI_ERR_UNSUPPORTED_DATAREP 51
|
||||
#define MPI_ERR_UNSUPPORTED_OPERATION 52
|
||||
#define MPI_ERR_WIN 53
|
||||
|
||||
#define MPI_T_ERR_MEMORY 54
|
||||
#define MPI_T_ERR_NOT_INITIALIZED 55
|
||||
#define MPI_T_ERR_CANNOT_INIT 56
|
||||
#define MPI_T_ERR_INVALID_INDEX 57
|
||||
#define MPI_T_ERR_INVALID_ITEM 58
|
||||
#define MPI_T_ERR_INVALID_HANDLE 59
|
||||
#define MPI_T_ERR_OUT_OF_HANDLES 60
|
||||
#define MPI_T_ERR_OUT_OF_SESSIONS 61
|
||||
#define MPI_T_ERR_INVALID_SESSION 62
|
||||
#define MPI_T_ERR_CVAR_SET_NOT_NOW 63
|
||||
#define MPI_T_ERR_CVAR_SET_NEVER 64
|
||||
#define MPI_T_ERR_PVAR_NO_STARTSTOP 65
|
||||
#define MPI_T_ERR_PVAR_NO_WRITE 66
|
||||
#define MPI_T_ERR_PVAR_NO_ATOMIC 67
|
||||
#define MPI_ERR_RMA_RANGE 68
|
||||
#define MPI_ERR_RMA_ATTACH 69
|
||||
#define MPI_ERR_RMA_FLAVOR 70
|
||||
#define MPI_ERR_RMA_SHARED 71
|
||||
#define MPI_T_ERR_INVALID 72
|
||||
#define MPI_T_ERR_INVALID_NAME 73
|
||||
#define MPI_ERR_PROC_ABORTED 74
|
||||
|
||||
/* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */
|
||||
#define MPI_ERR_PROC_FAILED 75
|
||||
#define MPI_ERR_PROC_FAILED_PENDING 76
|
||||
#define MPI_ERR_REVOKED 77
|
||||
|
||||
/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined
|
||||
MPI_ERR_<foo> code. Set the last code to allow some room for adding
|
||||
error codes without breaking ABI. */
|
||||
#define MPI_ERR_LASTCODE 92
|
||||
|
||||
/*
|
||||
* Comparison results. Don't change the order of these, the group
|
||||
* comparison functions rely on it.
|
||||
* Do not change the order of these without also modifying mpif.h.in.
|
||||
*/
|
||||
enum {
|
||||
MPI_IDENT,
|
||||
MPI_CONGRUENT,
|
||||
MPI_SIMILAR,
|
||||
MPI_UNEQUAL
|
||||
};
|
||||
|
||||
/*
|
||||
* MPI_Init_thread constants
|
||||
* Do not change the order of these without also modifying mpif.h.in.
|
||||
*/
|
||||
enum {
|
||||
MPI_THREAD_SINGLE,
|
||||
MPI_THREAD_FUNNELED,
|
||||
MPI_THREAD_SERIALIZED,
|
||||
MPI_THREAD_MULTIPLE
|
||||
};
|
||||
|
||||
/*
|
||||
* Datatype combiners.
|
||||
* Do not change the order of these without also modifying mpif.h.in.
|
||||
* (see also mpif-common.h.fin).
|
||||
*/
|
||||
enum {
|
||||
MPI_COMBINER_NAMED,
|
||||
MPI_COMBINER_DUP,
|
||||
MPI_COMBINER_CONTIGUOUS,
|
||||
MPI_COMBINER_VECTOR,
|
||||
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
|
||||
MPI_COMBINER_HVECTOR_INTEGER,
|
||||
#else
|
||||
OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER, /* preserve ABI compatibility */
|
||||
#endif
|
||||
MPI_COMBINER_HVECTOR,
|
||||
MPI_COMBINER_INDEXED,
|
||||
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
|
||||
MPI_COMBINER_HINDEXED_INTEGER,
|
||||
#else
|
||||
OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER, /* preserve ABI compatibility */
|
||||
#endif
|
||||
MPI_COMBINER_HINDEXED,
|
||||
MPI_COMBINER_INDEXED_BLOCK,
|
||||
#if (!OMPI_OMIT_MPI1_COMPAT_DECLS)
|
||||
MPI_COMBINER_STRUCT_INTEGER,
|
||||
#else
|
||||
OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER, /* preserve ABI compatibility */
|
||||
#endif
|
||||
MPI_COMBINER_STRUCT,
|
||||
MPI_COMBINER_SUBARRAY,
|
||||
MPI_COMBINER_DARRAY,
|
||||
MPI_COMBINER_F90_REAL,
|
||||
MPI_COMBINER_F90_COMPLEX,
|
||||
MPI_COMBINER_F90_INTEGER,
|
||||
MPI_COMBINER_RESIZED,
|
||||
MPI_COMBINER_HINDEXED_BLOCK
|
||||
};
|
||||
|
||||
////MOCK ERRHANDELERS
|
||||
#define OMPI_ERRHANDLER_INVOKE(comm,err,fname) err
|
||||
#define OMPI_ERRHANDLER_NOHANDLE_INVOKE(err,fname) err
|
||||
|
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef PARSER
|
||||
#define PARSER
|
||||
|
||||
#include "mmio.h"
|
||||
#include "sparseMatrix.h"
|
||||
|
||||
typedef struct{
|
||||
ulong row;
|
||||
ulong col;
|
||||
double val;
|
||||
} entry; //MatrixMarket COO entry
|
||||
|
||||
typedef struct{
|
||||
MM_typecode mcode;
|
||||
entry* entries;
|
||||
ulong* rowLens;
|
||||
ulong M,N,NZ; //spmat sizes
|
||||
} MatrixMarket;
|
||||
|
||||
////COO PARSE
|
||||
//parse and check MatrixMarket mat in @matPath file
|
||||
MatrixMarket* MMRead(char* matPath);
|
||||
void freeMatrixMarket(MatrixMarket* mm);
|
||||
//basic check for sparse matrix compliance to the app, return posix bool
|
||||
int MMCheck(MM_typecode typecode);
|
||||
/*
|
||||
* parse MatrixMarket matrix entries in @fp, of type @mcode
|
||||
* into COOrdinate list of entries
|
||||
* -> expand simmetric matrixes into a normal matrix with both parts
|
||||
* so NZ will be inplace doubled
|
||||
* return allocated and filled COO entries with the NNZ number into
|
||||
* NO SORT CHECKING HERE
|
||||
*/
|
||||
entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens);
|
||||
|
||||
////COO -> ANYTHING ELSE CONVERSION
|
||||
/*
|
||||
* write COO entries in @entries inside sparse matrix @mat in ELL format
|
||||
* EXPECTED: CSR arrays allocated, @entries col sorted in (not madatory consecut) rows
|
||||
* [simmetrical parts explicitly rappresented --> not important here]
|
||||
*/
|
||||
int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens);
|
||||
/*
|
||||
* write COO entries in @entries inside sparse matrix @mat in ELL format
|
||||
* EXPECTED: @entries col sorted in (not madatory consecut) rows
|
||||
* ELL internal array allocated in this function, not freed in case of error
|
||||
*/
|
||||
int COOtoELL(entry* entries, spmat* mat, ulong* rowLens);
|
||||
////wrapper MM -> specialized target
|
||||
/*
|
||||
* Parse MatrixMarket matrix stored in file at @matPath
|
||||
* IMPLEMENTED WRAPPING: MMtoCOO -> COOtoCSR
|
||||
* Returns: allocated spmat sparse matrix with all field allocated
|
||||
* symetric matrix are expanded in a full matrix
|
||||
*/
|
||||
spmat* MMtoCSR(char* matPath);
|
||||
spmat* MMtoELL(char* matPath);
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//sparse matrix def & aux
|
||||
#ifndef SPARSEMATRIX
|
||||
#define SPARSEMATRIX
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "macros.h"
|
||||
#include "config.h"
|
||||
|
||||
typedef struct {
|
||||
idx_t NZ,M,N;
|
||||
double *AS;
|
||||
idx_t* JA;
|
||||
//CSR SPECIFIC
|
||||
idx_t* IRP;
|
||||
#ifdef ROWLENS
|
||||
idx_t* RL; //row lengths
|
||||
#endif
|
||||
//CUDA SPECIFIC
|
||||
idx_t MAX_ROW_NZ;
|
||||
|
||||
} spmat; //describe a sparse matrix
|
||||
|
||||
//smart index keeping in a dense map
|
||||
typedef struct{
|
||||
idx_t len; //num of nnz idx accumulated
|
||||
/* nnz index presence packing, implict space enough for all possible indexes*/
|
||||
nnz_idxs_flags_t idxsMap;
|
||||
uint idxsMapN; //either num of limbs or len of char flag array
|
||||
} SPVECT_IDX_DENSE_MAP;
|
||||
int initSpVectIdxDenseAcc(idx_t idxMax, SPVECT_IDX_DENSE_MAP* );
|
||||
inline void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc){
|
||||
acc->len = 0;
|
||||
memset(acc->idxsMap,0,sizeof(*acc->idxsMap)*acc->idxsMapN);
|
||||
}
|
||||
inline void _freeIdxMap(SPVECT_IDX_DENSE_MAP* acc){
|
||||
free(acc->idxsMap);
|
||||
free(acc);
|
||||
}
|
||||
//aux struct for sparse vector-scalar product accumualtion
|
||||
typedef struct{
|
||||
double* v; //aux accumulating dense vector (sparse)
|
||||
idx_t* nnzIdx; //v nnz value's indexes (contiguos)
|
||||
idx_t vLen; //size of the aux dense vector //TODO USELESS?
|
||||
SPVECT_IDX_DENSE_MAP nnzIdxMap; //
|
||||
} ACC_DENSE;
|
||||
int allocAccDense(ACC_DENSE* v, ulong size);
|
||||
|
||||
/*
|
||||
**** NNZ IDXS PRESENCE FLAGS ACCESS INTERFACE: ***
|
||||
* sp_vect_idx_set(idx,SPVECT_IDX_DENSE_MAP)
|
||||
* -> return 0 if idx isn't already setted and in case set it
|
||||
*/
|
||||
inline int spVect_idx_in(idx_t idx, SPVECT_IDX_DENSE_MAP* idxsMapAcc){
|
||||
#if SPVECT_IDX_BITWISE == TRUE
|
||||
uint limbID = idx / LIMB_SIZE_BIT; //idx's limb id
|
||||
uint limbIdxID = idx % LIMB_SIZE_BIT; //idx's pos in limb
|
||||
DEBUGCHECKS assert( limbID < idxsMapAcc->idxsMapN );
|
||||
limb_t idxPos = ((limb_t) 1) << limbIdxID;
|
||||
if (!(idxsMapAcc->idxsMap[limbID] & idxPos) ){
|
||||
idxsMapAcc->idxsMap[limbID] |= idxPos;
|
||||
idxsMapAcc->len++;
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
//assert( idx < idxsMapAcc->idxsMapN ); //TODO
|
||||
if (!( idxsMapAcc->idxsMap[idx] )){ //TODO usabile primitiva atomica di cmpswap, accorpamento ops a livello HW?
|
||||
idxsMapAcc->idxsMap[idx] = 1;
|
||||
idxsMapAcc->len++;
|
||||
return 0;
|
||||
}
|
||||
#endif //SPVECT_IDX_BITWISE == TRUE
|
||||
return 1;
|
||||
}
|
||||
inline void _resetAccVect(ACC_DENSE* acc){
|
||||
memset(acc->v, 0, acc->vLen * sizeof(*(acc->v)));
|
||||
memset(acc->nnzIdx, 0, acc->vLen * sizeof(*(acc->nnzIdx)));
|
||||
_resetIdxMap(&acc->nnzIdxMap);
|
||||
}
|
||||
////Sparse vector accumulator -- corresponding to a matrix portion
|
||||
typedef struct{
|
||||
//idx_t r; //row index in the corresponding matrix
|
||||
//idx_t c; //col index in the corresponding matrix
|
||||
idx_t len; //rowLen
|
||||
double* AS; //row nnz values
|
||||
idx_t* JA; //row nnz colIndexes
|
||||
} SPACC;
|
||||
|
||||
|
||||
/*
|
||||
* ARRAY BISECTION - RECURSIVE VERSION
|
||||
* TODO ASSERT LEN>0 ommitted
|
||||
*/
|
||||
inline int BISECT_ARRAY(idx_t target, idx_t* arr, idx_t len){
|
||||
//if (len == 0) return FALSE;
|
||||
if (len <= 1) return *arr == target;
|
||||
idx_t middleIdx = (len-1) / 2; //len=5-->2, len=4-->1
|
||||
idx_t middle = arr[ middleIdx ];
|
||||
if (target == middle) return TRUE;
|
||||
else if (target < middle) return BISECT_ARRAY(target,arr,middleIdx);
|
||||
else return BISECT_ARRAY(target,arr+middleIdx+1,middleIdx + (len-1)%2);
|
||||
}
|
||||
|
||||
/*
|
||||
* return !0 if col @j idx is in row @i of sparse mat @smat
|
||||
* bisection used --> O(log_2(ROWLENGHT))
|
||||
*/
|
||||
inline int IS_NNZ(spmat* smat,idx_t i,idx_t j){
|
||||
idx_t rStart = smat->IRP[i];
|
||||
idx_t rLen = smat->IRP[i+1] - rStart;
|
||||
if (!rLen) return FALSE;
|
||||
return BISECT_ARRAY(j,smat->JA + rStart,rLen);
|
||||
}
|
||||
inline int IS_NNZ_linear(spmat* smat,idx_t i,idx_t j){ //linear -> O(ROWLENGHT)
|
||||
int out = 0;
|
||||
for (idx_t x=smat->IRP[i]; x<smat->IRP[i+1] && !out; x++){
|
||||
out = (j == smat->JA[x]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
////aux functions
|
||||
//free sparse matrix
|
||||
inline void freeSpmatInternal(spmat* mat){
|
||||
if(!mat) return;
|
||||
free(mat->AS);
|
||||
free(mat->JA);
|
||||
free(mat->IRP);
|
||||
#ifdef ROWLENS
|
||||
free(mat->RL);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void freeSpmat(spmat* mat){
|
||||
if (!mat) return;
|
||||
freeSpmatInternal(mat);
|
||||
free(mat);
|
||||
}
|
||||
|
||||
static inline void zeroSpmat(spmat* m) {
|
||||
memset(m->AS, 0, sizeof(*(m->AS)) * m->NZ);
|
||||
memset(m->JA, 0, sizeof(*(m->JA)) * m->NZ);
|
||||
memset(m->IRP, 0, sizeof(*(m->IRP)) * (m->M + 1));
|
||||
}
|
||||
|
||||
//free max aux structs not NULL pointed
|
||||
inline void freeSpAcc(SPACC* r){
|
||||
free(r->AS);
|
||||
free(r->JA);
|
||||
}
|
||||
////alloc&init functions
|
||||
//alloc&init internal structures only dependent of dimensions @rows,@cols
|
||||
inline int allocSpMatrixInternal(idx_t rows, idx_t cols, spmat* mat){
|
||||
mat -> M = rows;
|
||||
mat -> N = cols;
|
||||
if (!(mat->IRP=calloc(mat->M+1,sizeof(*(mat->IRP))))){ //calloc only for 0th
|
||||
ERRPRINT("IRP calloc err\n");
|
||||
freeSpmatInternal(mat);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#ifdef ROWLENS
|
||||
if (!(mat->RL = malloc(mat->M*sizeof(*(mat->RL))))){
|
||||
ERRPRINT("RL calloc err\n");
|
||||
freeSpmatInternal(mat);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
//alloc a sparse matrix of @rows rows and @cols cols
|
||||
inline spmat* allocSpMatrix(idx_t rows, idx_t cols){
|
||||
|
||||
spmat* mat;
|
||||
if (!(mat = calloc(1,sizeof(*mat)))) {
|
||||
ERRPRINT("mat calloc failed\n");
|
||||
return NULL;
|
||||
}
|
||||
if (allocSpMatrixInternal(rows,cols,mat)){
|
||||
free(mat);
|
||||
return NULL;
|
||||
}
|
||||
return mat;
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "sparseMatrix.h"
|
||||
#include "config.h"
|
||||
|
||||
#ifndef OFF_F
|
||||
#pragma error OFF_F required
|
||||
#endif
|
||||
//////////////////////////////// CSR SPECIFIC /////////////////////////////////
|
||||
///SPARSE MATRIX PARTITIONING
|
||||
/*
|
||||
* partition CSR sparse matrix @A in @gridCols columns partitions
|
||||
* returning an offsets matrix out[i][j] = start of jth colPartition of row i
|
||||
* subdivide @A columns in uniform cols ranges in the output
|
||||
*/
|
||||
idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols);
|
||||
|
||||
/*
|
||||
* partition CSR sparse matrix @A in @gridCols columns partitions as
|
||||
* indipended and allocated sparse matrixes and return them
|
||||
* subdivide @A columns in uniform cols ranges in the output
|
||||
*/
|
||||
spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols);
|
||||
//same as above but with (aux) use of offsets partitoning (also returned if colOffsets!=NULL
|
||||
spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols,idx_t** colPartsOffsets);
|
||||
|
||||
//same as checkOverallocPercent but with 2D partitioning - CSR col partitioning
|
||||
void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB,
|
||||
idx_t gridCols,idx_t* bColOffsets);
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
///Single implementations headers
|
||||
#ifndef SPARSEUTILS_H_COMMON_IDX_IMPLS
|
||||
#define SPARSEUTILS_H_COMMON_IDX_IMPLS
|
||||
|
||||
//shift every index about the sparse data for use the matrix in a fortran app
|
||||
inline void C_FortranShiftIdxs(spmat* m){
|
||||
for(ulong r=0; r<m->M+1; m -> IRP[r]++,r++);
|
||||
for(ulong i=0; i<m->NZ; m -> JA[i]++, i++);
|
||||
}
|
||||
//shift every index about the sparse data for use the matric in a C app
|
||||
inline void Fortran_C_ShiftIdxs(spmat* m){ //TODO DBG ONLY and compleatness
|
||||
for(ulong r=0; r<m->M+1; m -> IRP[r]--,r++);
|
||||
for(ulong i=0; i<m->NZ; m -> JA[i]--, i++);
|
||||
}
|
||||
|
||||
/*
|
||||
* check SpMM resulting matrix @AB = A * B nnz distribution in rows
|
||||
* with the preallocated, forecasted size in @forecastedSizes
|
||||
* in @forecastedSizes there's for each row -> forecasted size
|
||||
* and in the last entry the cumulative of the whole matrix
|
||||
*/
|
||||
void checkOverallocPercent(ulong* forecastedSizes,spmat* AB);
|
||||
/*
|
||||
check if sparse matrixes A<->B differ up to
|
||||
DOUBLE_DIFF_THREASH per element
|
||||
*/
|
||||
int spmatDiff(spmat* A, spmat* B);
|
||||
////dyn alloc of spMM output matrix
|
||||
/*
|
||||
///size prediction of AB = @A * @B
|
||||
inline ulong SpMMPreAlloc(spmat* A,spmat* B){
|
||||
//TODO BETTER PREALLOC HEURISTICS HERE
|
||||
return MAX(A->NZ,B->NZ);
|
||||
}
|
||||
//init a sparse matrix AB=@A * @B with a initial allocated space by an euristic
|
||||
inline spmat* initSpMatrixSpMM(spmat* A, spmat* B){
|
||||
spmat* out;
|
||||
if (!(out = allocSpMatrix(A->M,B->N))) return NULL;
|
||||
out -> NZ = SpMMPreAlloc(A,B);
|
||||
if (!(out->AS = malloc(out->NZ*sizeof(*(out->AS))))){
|
||||
ERRPRINT("initSpMatrix: out->AS malloc errd\n");
|
||||
free(out);
|
||||
return NULL;
|
||||
}
|
||||
if (!(out->JA = malloc(out->NZ*sizeof(*(out->JA))))){
|
||||
ERRPRINT("initSpMatrix: out->JA malloc errd\n");
|
||||
freeSpmat(out);
|
||||
return NULL;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
#define REALLOC_FACTOR 1.5
|
||||
//realloc sparse matrix NZ arrays
|
||||
inline int reallocSpMatrix(spmat* mat,ulong newSize){
|
||||
mat->NZ *= newSize;
|
||||
void* tmp;
|
||||
if (!(tmp = realloc(mat->AS,mat->NZ * sizeof(*(mat->AS))))){
|
||||
ERRPRINT("reallocSpMatrix: realloc AS errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
mat->AS = tmp;
|
||||
if (!(tmp = realloc(mat->JA,mat->NZ * sizeof(*(mat->JA))))){
|
||||
ERRPRINT("reallocSpMatrix: realloc JA errd\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
mat->JA = tmp;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
*/
|
||||
////MISC
|
||||
//print useful information about 3SPMM about to compute
|
||||
void print3SPMMCore(spmat* R,spmat* AC,spmat* P,CONFIG* conf);
|
||||
void printSparseMatrix(spmat* sparseMat,char justNZMarkers);
|
||||
/*convert @sparseMat sparse matrix in dense matrix returned*/
|
||||
double* CSRToDense(spmat* sparseMat);
|
||||
|
||||
void freeAccsDense(ACC_DENSE* vectors,ulong num);
|
||||
|
||||
#endif //SPARSEUTILS_H_COMMON_IDX_IMPLS
|
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef SPARSEUTILSMULTI_H
|
||||
#define SPARSEUTILSMULTI_H
|
||||
|
||||
#ifdef OFF_F //save "includer" OFF_F value before overwriting it
|
||||
#pragma push_macro("OFF_F")
|
||||
#define _OFF_F_OLD
|
||||
#undef OFF_F
|
||||
#endif
|
||||
|
||||
#define OFF_F 0
|
||||
#include "sparseUtilsGeneric.h"
|
||||
#undef OFF_F
|
||||
|
||||
#define OFF_F 1
|
||||
#include "sparseUtilsGeneric.h"
|
||||
#undef OFF_F
|
||||
|
||||
#ifdef _OFF_F_OLD
|
||||
#pragma pop_macro("OFF_F")
|
||||
#undef _OFF_F_OLD
|
||||
#endif
|
||||
|
||||
#endif
|
@ -0,0 +1,181 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef UTILS
|
||||
#define UTILS
|
||||
|
||||
//#include <stddef.h>
|
||||
#include "macros.h"
|
||||
|
||||
#include "linuxK_rbtree_minimalized.h"
|
||||
extern int urndFd; //file pointer to DRNG_DEVFILE O_RDONLY opened
|
||||
int init_urndfd(); // wrap init urndFd
|
||||
|
||||
///IO
|
||||
//UNBUFFERED IO
|
||||
/*
|
||||
* urndFd usage template to populate random timeout
|
||||
if(_read_wrap(urndFd,(char*)&timeout,sizeof(timeout))<0){
|
||||
fprintf(stderr,"rnd read for thread's timeout failed");
|
||||
ret=EXIT_FAILURE;
|
||||
goto end;
|
||||
}
|
||||
*/
|
||||
//wrap read cycle over @fd
|
||||
int read_wrap(int fd,void* dst,size_t count);
|
||||
int fread_wrap(FILE* fp,void* dst,size_t count);
|
||||
//dual of read_wrap
|
||||
int write_wrap(int fd,void* src,size_t count);
|
||||
//create or open file at @outFpath for write
|
||||
int createNewFile(char* const outFpath);
|
||||
///STRUCTURED DATA IO
|
||||
#define DOUBLE_STR_FORMAT "%25le\n"
|
||||
//write double vector @v as row sequence of double at @fpath
|
||||
//e.g. read with od -tf8 -w8 fpath : OCTALOFFSET: DOUBLE FULL DIGITS
|
||||
int writeDoubleVector(char* fpath,double* v,ulong size);
|
||||
/*
|
||||
* read vector of double [Str] of arbitrary size from @fpath, true lenght in *size
|
||||
* if size point to a nnz value, the initial allocation will be of *size
|
||||
* eventual successive reallocation done multipling *size with VECTOR_STEP_REALLOC
|
||||
*/
|
||||
double* readDoubleVector(char* fpath,ulong* size);
|
||||
double* readDoubleVectorStr(char* fpath,ulong* size);
|
||||
|
||||
///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF
|
||||
//dual of readDoubleVectorVector
|
||||
int writeDoubleVectorAsStr(char* fpath,double* v,ulong size);
|
||||
int MPI_Dims_create(int nnodes, int ndims, int dims[]); //commons/ompi_dims_create/ompi_dims_create.c
|
||||
|
||||
#include "config.h"
|
||||
///config from ENV
|
||||
#define GRID_ROWS "GRID_ROWS"
|
||||
#define GRID_COLS "GRID_COLS"
|
||||
//parse configuration from env
|
||||
int getConfig(CONFIG* conf);
|
||||
|
||||
//append only list implemented with a reallocated array
|
||||
typedef struct{
|
||||
ulong* a;
|
||||
ulong size;
|
||||
ulong lastIdx;
|
||||
} APPENDARRAY;
|
||||
//append @val to @list, reallocating if reached end
|
||||
//TODO inline int appendArr(ulong val,APPENDARRAY* list);
|
||||
|
||||
void sortuint(uint* arr, uint len); //sort uint array @arr of @len elements
|
||||
void sort_idx_t(idx_t* arr, idx_t len);
|
||||
void sortulong(ulong* arr, ulong len);
|
||||
void sortRbNode(rbNode* arr,idx_t len);
|
||||
|
||||
///ranges functions
|
||||
/*
|
||||
* considering @rangesN uniform ranges from [0,rangesEnd)
|
||||
* return the rangeId that element @i is in
|
||||
*/
|
||||
inline ushort matchingUnifRangeIdxLinear(idx_t i,idx_t rangesEnd,ushort rangesN){
|
||||
double rangeW = rangesEnd / (double) rangesN;
|
||||
for(idx_t rEnd = rangeW,j=0; j < rangesN; rEnd = rangeW * (j++)){
|
||||
if( i < rEnd ) return j;
|
||||
}
|
||||
assert( FALSE ); //i should be in a range!
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
/*
|
||||
* find which range @idx match among
|
||||
* a uniform range divion of @size element in @rangesN ranges with carried reminder
|
||||
* return 0based idx of matched range
|
||||
*/
|
||||
inline ushort matchingUnifRangeIdx(idx_t idx, idx_t size, ushort rangesN){
|
||||
idx_t rangeW = size/rangesN, rangeRem = size%rangesN;
|
||||
idx_t searchStart,searchEnd;
|
||||
//!IN_RANGE(idx,unifRemShareBlock(idx,rangeW,rangeRem)) && IN_RANGE(r,0,rangesN);){
|
||||
for(ushort r=rangesN/2-1, hMoveWidth=rangesN/2; hMoveWidth>0;hMoveWidth/=2){
|
||||
searchStart = unifRemShareStart(idx,rangeW,rangeRem);
|
||||
searchEnd = unifRemShareEnd(idx,rangeW,rangeRem);
|
||||
if (IN_RANGE(idx,searchStart,searchEnd)) return r;
|
||||
else if (idx < searchStart) r = AVG(0,r);
|
||||
else r = AVG(r,rangesN-1);
|
||||
}
|
||||
assert(FALSE);
|
||||
}
|
||||
|
||||
///reductionUtils
|
||||
inline idx_t reductionSumSeq(idx_t* arr,idx_t arrLen){
|
||||
idx_t i,out;
|
||||
for(i=0,out=0; i<arrLen; out += arr[i++] );
|
||||
return out;
|
||||
}
|
||||
inline idx_t reductionMaxSeq(idx_t* arr,idx_t arrLen){
|
||||
idx_t i,out;
|
||||
for( i=0,out=0; i<arrLen; i++,out=out<arr[i]?arr[i]:out );
|
||||
return out;
|
||||
}
|
||||
inline idx_t reductionSumOmp(idx_t* arr,idx_t arrLen){
|
||||
idx_t out;
|
||||
#pragma omp parallel for reduction(+:out)
|
||||
for(idx_t i=0; i<arrLen; i++){
|
||||
out += arr[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
inline idx_t reductionMaxOmp(idx_t* arr,idx_t arrLen){
|
||||
idx_t out;
|
||||
#pragma omp parallel for reduction(max:out)
|
||||
for(idx_t i=0; i<arrLen; i++){ out=out<arr[i]?arr[i]:out; }
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* return 0 if vectors a and b has elements that differ at most of DOUBLE_DIFF_THREASH
|
||||
* if diffMax!=NULL save there the max difference value
|
||||
* of the 2 entries of the input vectors, signed as a[i] - b[i] (as well as dump prints)
|
||||
* CONVENTION: @a = true result, @b vector to check with the true result
|
||||
*/
|
||||
int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax);
|
||||
//fill a random vector in @v long @size doubles
|
||||
int fillRndVector(ulong size, double* v);
|
||||
//read vector as a sequence of space separated double from file at @fpath
|
||||
#define VECTOR_STEP_MALLOC 100
|
||||
|
||||
/*
|
||||
* decompress file at @path into @tmpFsDecompressPath,
|
||||
* decompression command obtanined first looking at the extension
|
||||
* then matching it with a list of avaible decompression cmd
|
||||
* that can be make as shell cmd adding @path > @tmpFsDecompressPath
|
||||
* e.g. decompress xz -d -c @path > @tmpFsDecompressPath
|
||||
* Returns: -1 if decompression wasn't possible otherwise decompress command exti status
|
||||
*/
|
||||
int extractInTmpFS(char* path, char* tmpFsDecompressPath);
|
||||
//compute E[@values] in @out[0] and VAR[@values] in @out[1] of @numVals values
|
||||
void statsAvgVar(double* values,uint numVals, double* out);
|
||||
void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers);
|
||||
void printVector(double* v,ulong size);
|
||||
|
||||
void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen);
|
||||
#endif
|
@ -0,0 +1,414 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
Red Black Trees
|
||||
(C) 1999 Andrea Arcangeli <andrea@suse.de>
|
||||
(C) 2002 David Woodhouse <dwmw2@infradead.org>
|
||||
(C) 2012 Michel Lespinasse <walken@google.com>
|
||||
|
||||
|
||||
linux/lib/rbtree.c
|
||||
*/
|
||||
/*
|
||||
* RedBlackTree_linux_userspace
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the RedBlackTree_linux_userspace or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE RedBlackTree_linux_userspace GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
///#include <linux/rbtree_augmented.h> //TODO LESS_DEPENDENCIES
|
||||
#include "linuxK_rbtree_minimalized.h" ///fulfill embedded deps needed //TODO LESS_DEPENDENCIES
|
||||
//#include <linux/export.h>
|
||||
|
||||
/* MINIMALIZED VERSION WITHOUT THE IMPORT OF rbtree_augmented
|
||||
* red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
|
||||
*
|
||||
* 1) A node is either red or black
|
||||
* 2) The root is black
|
||||
* 3) All leaves (NULL) are black
|
||||
* 4) Both children of every red node are black
|
||||
* 5) Every simple path from root to leaves contains the same number
|
||||
* of black nodes.
|
||||
*
|
||||
* 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
|
||||
* consecutive red nodes in a path and every red node is therefore followed by
|
||||
* a black. So if B is the number of black nodes on every simple path (as per
|
||||
* 5), then the longest possible path due to 4 is 2B.
|
||||
*
|
||||
* We shall indicate color with case, where black nodes are uppercase and red
|
||||
* nodes will be lowercase. Unknown color nodes shall be drawn as red within
|
||||
* parentheses and have some accompanying text comment.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Notes on lockless lookups:
|
||||
*
|
||||
* All stores to the tree structure (rb_left and rb_right) must be done using
|
||||
* WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
|
||||
* tree structure as seen in program order.
|
||||
*
|
||||
* These two requirements will allow lockless iteration of the tree -- not
|
||||
* correct iteration mind you, tree rotations are not atomic so a lookup might
|
||||
* miss entire subtrees.
|
||||
*
|
||||
* But they do guarantee that any such traversal will only see valid elements
|
||||
* and that it will indeed complete -- does not get stuck in a loop.
|
||||
*
|
||||
* It also guarantees that if the lookup returns an element it is the 'correct'
|
||||
* one. But not returning an element does _NOT_ mean it's not present.
|
||||
*
|
||||
* NOTE:
|
||||
*
|
||||
* Stores to __rb_parent_color are not important for simple lookups so those
|
||||
* are left undone as of now. Nor did I check for loops involving parent
|
||||
* pointers.
|
||||
*/
|
||||
|
||||
///rb_set_black //TODO ADDONLY_GENERIC
|
||||
|
||||
static inline struct rb_node *rb_red_parent(struct rb_node *red)
|
||||
{
|
||||
return (struct rb_node *)red->__rb_parent_color;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for rotations:
|
||||
* - old's parent and color get assigned to new
|
||||
* - old gets assigned new as a parent and 'color' as a color.
|
||||
*/
|
||||
static inline void
|
||||
__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
|
||||
struct rb_root *root, int color)
|
||||
{
|
||||
struct rb_node *parent = rb_parent(old);
|
||||
new->__rb_parent_color = old->__rb_parent_color;
|
||||
rb_set_parent_color(old, new, color);
|
||||
__rb_change_child(old, new, parent, root);
|
||||
}
|
||||
|
||||
///TODO REMOVE AGUMENT
|
||||
static __always_inline void
|
||||
__rb_insert(struct rb_node *node, struct rb_root *root)
|
||||
{
|
||||
struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
|
||||
|
||||
while (true) {
|
||||
/*
|
||||
* Loop invariant: node is red.
|
||||
*/
|
||||
if (unlikely(!parent)) {
|
||||
/*
|
||||
* The inserted node is root. Either this is the
|
||||
* first node, or we recursed at Case 1 below and
|
||||
* are no longer violating 4).
|
||||
*/
|
||||
rb_set_parent_color(node, NULL, RB_BLACK);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a black parent, we are done.
|
||||
* Otherwise, take some corrective action as,
|
||||
* per 4), we don't want a red root or two
|
||||
* consecutive red nodes.
|
||||
*/
|
||||
if(rb_is_black(parent))
|
||||
break;
|
||||
|
||||
gparent = rb_red_parent(parent);
|
||||
|
||||
tmp = gparent->rb_right;
|
||||
if (parent != tmp) { /* parent == gparent->rb_left */
|
||||
if (tmp && rb_is_red(tmp)) {
|
||||
/*
|
||||
* Case 1 - node's uncle is red (color flips).
|
||||
*
|
||||
* G g
|
||||
* / \ / \
|
||||
* p u --> P U
|
||||
* / /
|
||||
* n n
|
||||
*
|
||||
* However, since g's parent might be red, and
|
||||
* 4) does not allow this, we need to recurse
|
||||
* at g.
|
||||
*/
|
||||
rb_set_parent_color(tmp, gparent, RB_BLACK);
|
||||
rb_set_parent_color(parent, gparent, RB_BLACK);
|
||||
node = gparent;
|
||||
parent = rb_parent(node);
|
||||
rb_set_parent_color(node, parent, RB_RED);
|
||||
continue;
|
||||
}
|
||||
|
||||
tmp = parent->rb_right;
|
||||
if (node == tmp) {
|
||||
/*
|
||||
* Case 2 - node's uncle is black and node is
|
||||
* the parent's right child (left rotate at parent).
|
||||
*
|
||||
* G G
|
||||
* / \ / \
|
||||
* p U --> n U
|
||||
* \ /
|
||||
* n p
|
||||
*
|
||||
* This still leaves us in violation of 4), the
|
||||
* continuation into Case 3 will fix that.
|
||||
*/
|
||||
tmp = node->rb_left;
|
||||
WRITE_ONCE(parent->rb_right, tmp);
|
||||
WRITE_ONCE(node->rb_left, parent);
|
||||
if (tmp)
|
||||
rb_set_parent_color(tmp, parent,
|
||||
RB_BLACK);
|
||||
rb_set_parent_color(parent, node, RB_RED);
|
||||
///augment_rotate(parent, node); //TODO DEL AUGMENTED DEP
|
||||
parent = node;
|
||||
tmp = node->rb_right;
|
||||
}
|
||||
|
||||
/*
|
||||
* Case 3 - node's uncle is black and node is
|
||||
* the parent's left child (right rotate at gparent).
|
||||
*
|
||||
* G P
|
||||
* / \ / \
|
||||
* p U --> n g
|
||||
* / \
|
||||
* n U
|
||||
*/
|
||||
WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
|
||||
WRITE_ONCE(parent->rb_right, gparent);
|
||||
if (tmp)
|
||||
rb_set_parent_color(tmp, gparent, RB_BLACK);
|
||||
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
|
||||
///augment_rotate(gparent, parent); //TODO DEL_AUGMENTED_DEP
|
||||
break;
|
||||
} else {
|
||||
tmp = gparent->rb_left;
|
||||
if (tmp && rb_is_red(tmp)) {
|
||||
/* Case 1 - color flips */
|
||||
rb_set_parent_color(tmp, gparent, RB_BLACK);
|
||||
rb_set_parent_color(parent, gparent, RB_BLACK);
|
||||
node = gparent;
|
||||
parent = rb_parent(node);
|
||||
rb_set_parent_color(node, parent, RB_RED);
|
||||
continue;
|
||||
}
|
||||
|
||||
tmp = parent->rb_left;
|
||||
if (node == tmp) {
|
||||
/* Case 2 - right rotate at parent */
|
||||
tmp = node->rb_right;
|
||||
WRITE_ONCE(parent->rb_left, tmp);
|
||||
WRITE_ONCE(node->rb_right, parent);
|
||||
if (tmp)
|
||||
rb_set_parent_color(tmp, parent,
|
||||
RB_BLACK);
|
||||
rb_set_parent_color(parent, node, RB_RED);
|
||||
///augment_rotate(parent, node); //TODO DEL_AUGMENTED_DEP
|
||||
parent = node;
|
||||
tmp = node->rb_left;
|
||||
}
|
||||
|
||||
/* Case 3 - left rotate at gparent */
|
||||
WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
|
||||
WRITE_ONCE(parent->rb_left, gparent);
|
||||
if (tmp)
|
||||
rb_set_parent_color(tmp, gparent, RB_BLACK);
|
||||
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
|
||||
///augment_rotate(gparent, parent); //TODO DEL_AUGMENTED_DEP
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Inline version for rb_erase() use - we want to be able to inline
|
||||
* and eliminate the dummy_rotate callback there
|
||||
*/
|
||||
///____rb_erase_color //TODO ADDONLY_GENERIC
|
||||
|
||||
/* Non-inline version for rb_erase_augmented() use */
|
||||
///__rb_erase_color //TODO ADDONLY_GENERIC
|
||||
|
||||
/* TODO LESS_DEPENDENCIES MINIMALIZED
|
||||
* Non-augmented rbtree manipulation functions.
|
||||
*
|
||||
* We use dummy augmented callbacks here, and have the compiler optimize them
|
||||
* out of the rb_insert_color() and rb_erase() function definitions.
|
||||
|
||||
static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
|
||||
static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
|
||||
static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
|
||||
|
||||
static const struct rb_augment_callbacks dummy_callbacks = {
|
||||
.propagate = dummy_propagate,
|
||||
.copy = dummy_copy,
|
||||
.rotate = dummy_rotate
|
||||
};*/
|
||||
void rb_insert_color(struct rb_node *node, struct rb_root *root)
|
||||
{
|
||||
__rb_insert(node, root);
|
||||
}
|
||||
|
||||
///rb_erase //TODO ADDONLY_GENERIC
|
||||
|
||||
/** TODO LESS_DEPENDENCIES - AUGMENTED REMOVE
|
||||
* Augmented rbtree manipulation functions.
|
||||
*
|
||||
* This instantiates the same __always_inline functions as in the non-augmented
|
||||
* case, but this time with user-defined callbacks.
|
||||
*/
|
||||
///__rb_insert_augmented todo LESS_DEPENDENCIES - AUGMENTED REMOVE
|
||||
|
||||
////// Traversing
|
||||
/*
|
||||
* This function returns the first node (in sort order) of the tree.
|
||||
*/
|
||||
struct rb_node *rb_first(const struct rb_root *root)
|
||||
{
|
||||
struct rb_node *n;
|
||||
|
||||
n = root->rb_node;
|
||||
if (!n)
|
||||
return NULL;
|
||||
while (n->rb_left)
|
||||
n = n->rb_left;
|
||||
return n;
|
||||
}
|
||||
|
||||
struct rb_node *rb_last(const struct rb_root *root)
|
||||
{
|
||||
struct rb_node *n;
|
||||
|
||||
n = root->rb_node;
|
||||
if (!n)
|
||||
return NULL;
|
||||
while (n->rb_right)
|
||||
n = n->rb_right;
|
||||
return n;
|
||||
}
|
||||
|
||||
struct rb_node *rb_next(const struct rb_node *node)
|
||||
{
|
||||
struct rb_node *parent;
|
||||
|
||||
if (RB_EMPTY_NODE(node))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* If we have a right-hand child, go down and then left as far
|
||||
* as we can.
|
||||
*/
|
||||
if (node->rb_right) {
|
||||
node = node->rb_right;
|
||||
while (node->rb_left)
|
||||
node = node->rb_left;
|
||||
return (struct rb_node *)node;
|
||||
}
|
||||
|
||||
/*
|
||||
* No right-hand children. Everything down and left is smaller than us,
|
||||
* so any 'next' node must be in the general direction of our parent.
|
||||
* Go up the tree; any time the ancestor is a right-hand child of its
|
||||
* parent, keep going up. First time it's a left-hand child of its
|
||||
* parent, said parent is our 'next' node.
|
||||
*/
|
||||
while ((parent = rb_parent(node)) && node == parent->rb_right)
|
||||
node = parent;
|
||||
|
||||
return parent;
|
||||
}
|
||||
|
||||
struct rb_node *rb_prev(const struct rb_node *node)
|
||||
{
|
||||
struct rb_node *parent;
|
||||
|
||||
if (RB_EMPTY_NODE(node))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* If we have a left-hand child, go down and then right as far
|
||||
* as we can.
|
||||
*/
|
||||
if (node->rb_left) {
|
||||
node = node->rb_left;
|
||||
while (node->rb_right)
|
||||
node = node->rb_right;
|
||||
return (struct rb_node *)node;
|
||||
}
|
||||
|
||||
/*
|
||||
* No left-hand children. Go up till we find an ancestor which
|
||||
* is a right-hand child of its parent.
|
||||
*/
|
||||
while ((parent = rb_parent(node)) && node == parent->rb_left)
|
||||
node = parent;
|
||||
|
||||
return parent;
|
||||
}
|
||||
|
||||
///rb_replace_node //TODO ADDONLY_GENERIC
|
||||
///rb_replace_node_rcu //TODO LESS_DEPENDENCIES
|
||||
|
||||
static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
|
||||
{
|
||||
for (;;) {
|
||||
if (node->rb_left)
|
||||
node = node->rb_left;
|
||||
else if (node->rb_right)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return (struct rb_node *)node;
|
||||
}
|
||||
}
|
||||
|
||||
struct rb_node *rb_next_postorder(const struct rb_node *node)
|
||||
{
|
||||
const struct rb_node *parent;
|
||||
if (!node)
|
||||
return NULL;
|
||||
parent = rb_parent(node);
|
||||
|
||||
/* If we're sitting on node, we've already seen our children */
|
||||
if (parent && node == parent->rb_left && parent->rb_right) {
|
||||
/* If we are the parent's left node, go to the parent's right
|
||||
* node then all the way down to the left */
|
||||
return rb_left_deepest_node(parent->rb_right);
|
||||
} else
|
||||
/* Otherwise we are the parent's right node, and the parent
|
||||
* should be next */
|
||||
return (struct rb_node *)parent;
|
||||
}
|
||||
|
||||
struct rb_node *rb_first_postorder(const struct rb_root *root)
|
||||
{
|
||||
if (!root->rb_node)
|
||||
return NULL;
|
||||
|
||||
return rb_left_deepest_node(root->rb_node);
|
||||
}
|
@ -0,0 +1,511 @@
|
||||
/*
|
||||
* Matrix Market I/O library for ANSI C
|
||||
*
|
||||
* See http://math.nist.gov/MatrixMarket for details.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "mmio.h"
|
||||
|
||||
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
|
||||
double **val_, int **I_, int **J_)
|
||||
{
|
||||
FILE *f;
|
||||
MM_typecode matcode;
|
||||
int M, N, nz;
|
||||
int i;
|
||||
double *val;
|
||||
int *I, *J;
|
||||
|
||||
if ((f = fopen(fname, "r")) == NULL)
|
||||
return -1;
|
||||
|
||||
|
||||
if (mm_read_banner(f, &matcode) != 0)
|
||||
{
|
||||
printf("mm_read_unsymetric: Could not process Matrix Market banner ");
|
||||
printf(" in file [%s]\n", fname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
|
||||
mm_is_sparse(matcode)))
|
||||
{
|
||||
fprintf(stderr, "Sorry, this application does not support ");
|
||||
fprintf(stderr, "Market Market type: [%s]\n",
|
||||
mm_typecode_to_str(matcode));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* find out size of sparse matrix: M, N, nz .... */
|
||||
|
||||
if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
|
||||
{
|
||||
fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
*M_ = M;
|
||||
*N_ = N;
|
||||
*nz_ = nz;
|
||||
|
||||
/* reseve memory for matrices */
|
||||
|
||||
I = (int *) malloc(nz * sizeof(int));
|
||||
J = (int *) malloc(nz * sizeof(int));
|
||||
val = (double *) malloc(nz * sizeof(double));
|
||||
|
||||
*val_ = val;
|
||||
*I_ = I;
|
||||
*J_ = J;
|
||||
|
||||
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
|
||||
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
|
||||
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
|
||||
|
||||
for (i=0; i<nz; i++)
|
||||
{
|
||||
fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
|
||||
I[i]--; /* adjust from 1-based to 0-based */
|
||||
J[i]--;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_is_valid(MM_typecode matcode)
|
||||
{
|
||||
if (!mm_is_matrix(matcode)) return 0;
|
||||
if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
|
||||
if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
|
||||
if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) ||
|
||||
mm_is_skew(matcode))) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int mm_read_banner(FILE *f, MM_typecode *matcode)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
char banner[MM_MAX_TOKEN_LENGTH];
|
||||
char mtx[MM_MAX_TOKEN_LENGTH];
|
||||
char crd[MM_MAX_TOKEN_LENGTH];
|
||||
char data_type[MM_MAX_TOKEN_LENGTH];
|
||||
char storage_scheme[MM_MAX_TOKEN_LENGTH];
|
||||
char *p;
|
||||
|
||||
|
||||
mm_clear_typecode(matcode);
|
||||
|
||||
if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
|
||||
storage_scheme) != 5)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
|
||||
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
|
||||
|
||||
/* check for banner */
|
||||
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
|
||||
return MM_NO_HEADER;
|
||||
|
||||
/* first field should be "mtx" */
|
||||
if (strcmp(mtx, MM_MTX_STR) != 0)
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
mm_set_matrix(matcode);
|
||||
|
||||
|
||||
/* second field describes whether this is a sparse matrix (in coordinate
|
||||
storgae) or a dense array */
|
||||
|
||||
|
||||
if (strcmp(crd, MM_SPARSE_STR) == 0)
|
||||
mm_set_sparse(matcode);
|
||||
else
|
||||
if (strcmp(crd, MM_DENSE_STR) == 0)
|
||||
mm_set_dense(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* third field */
|
||||
|
||||
if (strcmp(data_type, MM_REAL_STR) == 0)
|
||||
mm_set_real(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
|
||||
mm_set_complex(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_PATTERN_STR) == 0)
|
||||
mm_set_pattern(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_INT_STR) == 0)
|
||||
mm_set_integer(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* fourth field */
|
||||
|
||||
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
|
||||
mm_set_general(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
|
||||
mm_set_symmetric(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
|
||||
mm_set_hermitian(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
|
||||
mm_set_skew(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz)
|
||||
{
|
||||
if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
|
||||
/* set return null parameter values, in case we exit with errors */
|
||||
*M = *N = *nz = 0;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d %d", M, N, nz) == 3)
|
||||
return 0;
|
||||
|
||||
else
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(f, "%d %d %d", M, N, nz);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int mm_read_mtx_array_size(FILE *f, int *M, int *N)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
/* set return null parameter values, in case we exit with errors */
|
||||
*M = *N = 0;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d", M, N) == 2)
|
||||
return 0;
|
||||
|
||||
else /* we have a blank line */
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(f, "%d %d", M, N);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 2);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_array_size(FILE *f, int M, int N)
|
||||
{
|
||||
if (fprintf(f, "%d %d\n", M, N) != 2)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-------------------------------------------------------------------------*/
|
||||
|
||||
/******************************************************************/
|
||||
/* use when I[], J[], and val[]J, and val[] are already allocated */
|
||||
/******************************************************************/
|
||||
|
||||
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode)
|
||||
{
|
||||
int i;
|
||||
if (mm_is_complex(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
|
||||
!= 4) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else if (mm_is_real(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
|
||||
!= 3) return MM_PREMATURE_EOF;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
if (fscanf(f, "%d %d", &I[i], &J[i])
|
||||
!= 2) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
|
||||
double *real, double *imag, MM_typecode matcode)
|
||||
{
|
||||
if (mm_is_complex(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
|
||||
!= 4) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else if (mm_is_real(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg\n", I, J, real)
|
||||
!= 3) return MM_PREMATURE_EOF;
|
||||
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
mm_read_mtx_crd() fills M, N, nz, array of values, and return
|
||||
type code, e.g. 'MCRS'
|
||||
|
||||
if matrix is complex, values[] is of size 2*nz,
|
||||
(nz pairs of real/imaginary values)
|
||||
************************************************************************/
|
||||
|
||||
int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
|
||||
double **val, MM_typecode *matcode)
|
||||
{
|
||||
int ret_code;
|
||||
FILE *f;
|
||||
|
||||
if (strcmp(fname, "stdin") == 0) f=stdin;
|
||||
else
|
||||
if ((f = fopen(fname, "r")) == NULL)
|
||||
return MM_COULD_NOT_READ_FILE;
|
||||
|
||||
|
||||
if ((ret_code = mm_read_banner(f, matcode)) != 0)
|
||||
return ret_code;
|
||||
|
||||
if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) &&
|
||||
mm_is_matrix(*matcode)))
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
|
||||
return ret_code;
|
||||
|
||||
|
||||
*I = (int *) malloc(*nz * sizeof(int));
|
||||
*J = (int *) malloc(*nz * sizeof(int));
|
||||
*val = NULL;
|
||||
|
||||
if (mm_is_complex(*matcode))
|
||||
{
|
||||
*val = (double *) malloc(*nz * 2 * sizeof(double));
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
else if (mm_is_real(*matcode))
|
||||
{
|
||||
*val = (double *) malloc(*nz * sizeof(double));
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(*matcode))
|
||||
{
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
|
||||
if (f != stdin) fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_banner(FILE *f, MM_typecode matcode)
|
||||
{
|
||||
char *str = mm_typecode_to_str(matcode);
|
||||
int ret_code;
|
||||
|
||||
ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
|
||||
free(str);
|
||||
if (ret_code !=2 )
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode)
|
||||
{
|
||||
FILE *f;
|
||||
int i;
|
||||
|
||||
if (strcmp(fname, "stdout") == 0)
|
||||
f = stdout;
|
||||
else
|
||||
if ((f = fopen(fname, "w")) == NULL)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
|
||||
/* print banner followed by typecode */
|
||||
fprintf(f, "%s ", MatrixMarketBanner);
|
||||
fprintf(f, "%s\n", mm_typecode_to_str(matcode));
|
||||
|
||||
/* print matrix sizes and nonzeros */
|
||||
fprintf(f, "%d %d %d\n", M, N, nz);
|
||||
|
||||
/* print values */
|
||||
if (mm_is_pattern(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d\n", I[i], J[i]);
|
||||
else
|
||||
if (mm_is_real(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
|
||||
else
|
||||
if (mm_is_complex(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i],
|
||||
val[2*i+1]);
|
||||
else
|
||||
{
|
||||
if (f != stdout) fclose(f);
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
}
|
||||
|
||||
if (f !=stdout) fclose(f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a new copy of a string s. mm_strdup() is a common routine, but
|
||||
* not part of ANSI C, so it is included here. Used by mm_typecode_to_str().
|
||||
*
|
||||
*/
|
||||
char *mm_strdup(const char *s)
|
||||
{
|
||||
int len = strlen(s);
|
||||
char *s2 = (char *) malloc((len+1)*sizeof(char));
|
||||
return strcpy(s2, s);
|
||||
}
|
||||
|
||||
char *mm_typecode_to_str(MM_typecode matcode)
|
||||
{
|
||||
char buffer[MM_MAX_LINE_LENGTH];
|
||||
char *types[4];
|
||||
char *mm_strdup(const char *);
|
||||
int error =0;
|
||||
|
||||
/* check for MTX type */
|
||||
if (mm_is_matrix(matcode))
|
||||
types[0] = MM_MTX_STR;
|
||||
else
|
||||
error=1;
|
||||
|
||||
/* check for CRD or ARR matrix */
|
||||
if (mm_is_sparse(matcode))
|
||||
types[1] = MM_SPARSE_STR;
|
||||
else
|
||||
if (mm_is_dense(matcode))
|
||||
types[1] = MM_DENSE_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
/* check for element data type */
|
||||
if (mm_is_real(matcode))
|
||||
types[2] = MM_REAL_STR;
|
||||
else
|
||||
if (mm_is_complex(matcode))
|
||||
types[2] = MM_COMPLEX_STR;
|
||||
else
|
||||
if (mm_is_pattern(matcode))
|
||||
types[2] = MM_PATTERN_STR;
|
||||
else
|
||||
if (mm_is_integer(matcode))
|
||||
types[2] = MM_INT_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
|
||||
/* check for symmetry type */
|
||||
if (mm_is_general(matcode))
|
||||
types[3] = MM_GENERAL_STR;
|
||||
else
|
||||
if (mm_is_symmetric(matcode))
|
||||
types[3] = MM_SYMM_STR;
|
||||
else
|
||||
if (mm_is_hermitian(matcode))
|
||||
types[3] = MM_HERM_STR;
|
||||
else
|
||||
if (mm_is_skew(matcode))
|
||||
types[3] = MM_SKEW_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
|
||||
return mm_strdup(buffer);
|
||||
|
||||
}
|
@ -0,0 +1,391 @@
|
||||
/*
|
||||
* Sp3MM_for_AlgebraicMultiGrid
|
||||
* (C) Copyright 2021-2022
|
||||
* Andrea Di Iorio
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions, and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the Sp3MM_for_AlgebraicMultiGrid or the names of its contributors may
|
||||
* not be used to endorse or promote products derived from this
|
||||
* software without specific written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Sp3MM_for_AlgebraicMultiGrid GROUP OR ITS CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sparseMatrix.h"
|
||||
#include "mmio.h"
|
||||
#include "parser.h"
|
||||
#include "macros.h"
|
||||
#include "utils.h"
|
||||
|
||||
////COO PARSE
|
||||
int MMCheck(MM_typecode mcode) {
|
||||
if (!mm_is_matrix(mcode)){ //consistency checks among flags in @mcode
|
||||
ERRPRINT("invalid matrix: not a matrix\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (mm_is_dense(mcode) ){ //|| mm_is_array(mcode) ){
|
||||
ERRPRINT("invalid matrix: not a supported sparse matrix\tDENSE MAT\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens){
|
||||
int scanndRet=0;
|
||||
ulong nzTrgt=*NZ,nzIdx=0; //expanded num of nz (in case of sym matrix)
|
||||
ulong diagEntries=0, row = 0, col = 0;//current entry's row,col from MM -> 1 based
|
||||
double val = 0;
|
||||
entry* entries = NULL; //COO parsed entries
|
||||
///init
|
||||
if (mm_is_symmetric(mcode)){
|
||||
nzTrgt = 2* (*NZ); //upscale max num of nz in the matrix
|
||||
VERBOSE printf("MMtoCOO:\tparsing a simmetric matrix\n");
|
||||
}
|
||||
if (!(entries = malloc(nzTrgt * sizeof(*entries)))){
|
||||
ERRPRINT("MMtoCOO: entries malloc errd\n");
|
||||
return NULL;
|
||||
}
|
||||
///parse MM fp lines into COOordinate entries
|
||||
while (1) { // Reading the fp until EOF
|
||||
if (mm_is_pattern(mcode)){
|
||||
scanndRet = fscanf(fp, "%lu %lu\n", &row, &col);
|
||||
val = 1.0;
|
||||
} else if (mm_is_real(mcode) || (mm_is_integer(mcode))){
|
||||
scanndRet = fscanf(fp, "%lu %lu %lf\n", &row, &col, &val);
|
||||
}
|
||||
|
||||
|
||||
if (scanndRet == EOF){ //TODO more strict check with type&ret?
|
||||
if (ferror(fp)){
|
||||
perror("fscanf EOF");
|
||||
goto _err;
|
||||
} else break;
|
||||
}
|
||||
CONSISTENCY_CHECKS{
|
||||
//TODO USELESS ? ? ?
|
||||
if ((mm_is_pattern(mcode) && scanndRet != 2) ||
|
||||
(!mm_is_pattern(mcode) && scanndRet != 3)){
|
||||
ERRPRINT("invalid matrix: not consistent entry scannable\n");
|
||||
goto _err;
|
||||
}
|
||||
}
|
||||
////ADD THE CURRENT MATRIX ENTRY
|
||||
rowLens[row-1]++;
|
||||
entries[nzIdx++]=(entry) { .row=row-1, .col=col-1, .val=val };
|
||||
//also mirrored entry if sym.matrix with reflected idx inside matrix limits
|
||||
if (mm_is_symmetric(mcode) && row != col ){
|
||||
//TODO COSTRAINED FORMAT ?&& row <= mat->N && col <= mat->M ){
|
||||
SWAP(row,col);
|
||||
rowLens[row-1]++;
|
||||
entries[nzIdx++]=(entry) { .row=row-1, .col=col-1, .val=val };
|
||||
}
|
||||
else diagEntries++; //for CONSISTENCY_CHECKS only
|
||||
}
|
||||
|
||||
//CONSISTENCY_CHECKS
|
||||
nzTrgt = *NZ;
|
||||
if(mm_is_symmetric(mcode)) nzTrgt = 2*(*NZ) - diagEntries;
|
||||
assert( nzIdx == nzTrgt );
|
||||
|
||||
//update NZ
|
||||
*NZ = nzIdx;
|
||||
return entries;
|
||||
|
||||
_err:
|
||||
free(entries); return NULL;
|
||||
}
|
||||
|
||||
void freeMatrixMarket(MatrixMarket* mm){
|
||||
if (!mm) return;
|
||||
free(mm->entries);
|
||||
free(mm->rowLens);
|
||||
free(mm);
|
||||
}
|
||||
MatrixMarket* MMRead(char* matPath){
|
||||
FILE* fp = fopen(matPath, "r");
|
||||
if (!fp){
|
||||
perror("fopen");
|
||||
return NULL;
|
||||
}
|
||||
MatrixMarket* out = calloc(1,sizeof(*out));
|
||||
if (!out){
|
||||
ERRPRINT("MMRead out malloc errd\n");
|
||||
goto err;
|
||||
}
|
||||
//banner -> parse matrix specs
|
||||
if (mm_read_banner(fp, &out->mcode) != 0) {
|
||||
fprintf(stderr,"mm_read_banner err at:%s\n",matPath);
|
||||
goto err;
|
||||
}
|
||||
//assert matrix is compatible with this app scope
|
||||
if (MMCheck(out->mcode)) goto err;
|
||||
|
||||
//parse sizes
|
||||
//TODO OVERCOME uint limitation?
|
||||
if(mm_read_mtx_crd_size(fp,(uint*) &out->M, (uint*) &out->N, (uint*) &out->NZ)){
|
||||
fprintf(stderr,"mm_read_mtx_crd_size err at %s:\n",matPath);
|
||||
goto err;
|
||||
}
|
||||
if (!(out->rowLens = calloc(out->M,sizeof(*(out->rowLens))))){
|
||||
ERRPRINT("MMRead:\trowLens calloc errd\n");
|
||||
goto err;
|
||||
}
|
||||
if (!(out->entries = MMtoCOO(&out->NZ, fp, out->mcode,out->rowLens))){
|
||||
ERRPRINTS("MAT PARSE TO CSR ERR at:%s\n",matPath);
|
||||
goto err;
|
||||
}
|
||||
goto _end;
|
||||
|
||||
err:
|
||||
freeMatrixMarket(out);
|
||||
out = NULL;
|
||||
_end:
|
||||
fclose(fp);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
////COO -> ANYTHING ELSE CONVERSION
|
||||
int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){
|
||||
int out = EXIT_FAILURE;
|
||||
ulong idx;
|
||||
long* _rowsLastCol = NULL; //for each row -> last added entry's columnIdx
|
||||
ulong* rowsNextIdx = NULL; //for each row -> next entry progressive idx
|
||||
if (!(rowsNextIdx = calloc(mat->M,sizeof(*rowsNextIdx)))){
|
||||
ERRPRINT("MMtoCOO: rowsNextIdx calloc errd\n");
|
||||
goto _end;
|
||||
}
|
||||
CONSISTENCY_CHECKS{ //alloc and init aux arr for entries sort check
|
||||
if (!(_rowsLastCol = malloc(mat->M*sizeof(*_rowsLastCol)))){
|
||||
ERRPRINT("MMtoCOO: _rowsLastCol malloc errd\n");
|
||||
goto _end;
|
||||
}
|
||||
memset(_rowsLastCol,-1,mat->M*sizeof(*_rowsLastCol));
|
||||
}
|
||||
/*TODO OLD
|
||||
* //get rowLens->IRP (partial), TODO moved MMtoCOO to avoid FULL rescan entries
|
||||
* for (ulong i=0; i<mat->NZ; i++) mat->IRP[entries[i].row+1]++;
|
||||
* memcpy(mat->RL,mat->IRP + 1,sizeof(*mat->IRP) * mat->M); //TODO in next ifdef
|
||||
* for (ulong i=2; i<mat->M+1; i++) mat->IRP[i] += mat->IRP[i-1];
|
||||
* OLD2: rowLens memcpy ... no just moved the pointer
|
||||
* #ifdef ROWLENS
|
||||
* memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef
|
||||
* #endif
|
||||
*/
|
||||
//IRP: trasform rows lens as increments to build row index "pointer"
|
||||
//0th -> 0 mandatory; 1th = 0th row len, ...., M+1th = end of Mth row
|
||||
memcpy(mat->IRP+1,rowLens,sizeof(*rowLens) * mat->M);//init IRP with rows lens
|
||||
for (ulong i=2; i<mat->M+1; i++) mat->IRP[i] += mat->IRP[i-1];
|
||||
CONSISTENCY_CHECKS assert(mat->IRP[mat->M] == mat->NZ);
|
||||
///FILL
|
||||
//TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS
|
||||
//sorted for each row (even nn sequential in @entries)
|
||||
//entries write in CSR format
|
||||
entry* e;
|
||||
for (ulong i=0; i<mat->NZ; i++) {
|
||||
e = entries+i;
|
||||
CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE SORTED
|
||||
/*#pragma message("COO sorting check enabled")*/
|
||||
if (_rowsLastCol[e->row] >= (long) e->col){
|
||||
ERRPRINTS("not sorted entry:%ld,%ld,%lf",e->row,e->col,e->val);
|
||||
goto _end;
|
||||
}
|
||||
_rowsLastCol[e->row] = e->col;
|
||||
}
|
||||
idx = mat -> IRP[e->row] + rowsNextIdx[e->row]++;
|
||||
mat -> AS[idx] = e->val;
|
||||
mat -> JA[idx] = e->col;
|
||||
}
|
||||
|
||||
out = EXIT_SUCCESS;
|
||||
|
||||
_end:
|
||||
if(rowsNextIdx) free(rowsNextIdx);
|
||||
if(_rowsLastCol) free(_rowsLastCol);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){
|
||||
int out=EXIT_FAILURE;
|
||||
ulong maxRow = 0, col, _ellEntriesTot, *rowsNextCol;
|
||||
long* _rowsLastCol=NULL;
|
||||
entry* e;
|
||||
for (ulong i=0; i<mat->M; i++) maxRow = MAX(maxRow,rowLens[i]);
|
||||
_ellEntriesTot = 2*mat->M*maxRow;
|
||||
#ifdef LIMIT_ELL_SIZE
|
||||
if ( _ellEntriesTot > ELL_MAX_ENTRIES ){
|
||||
ERRPRINTS("Required entries %lu -> %lu uMB for the matrix exceed the "
|
||||
"designated threashold of: %lu -> %lu MB for ellpack\n",
|
||||
_ellEntriesTot,(sizeof(double)*_ellEntriesTot) >> 20,
|
||||
ELL_MAX_ENTRIES,(sizeof(double)*ELL_MAX_ENTRIES) >> 20);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
||||
//malloc aux vects
|
||||
if (!(rowsNextCol = calloc(mat->M,sizeof(*rowsNextCol)))){
|
||||
ERRPRINT("MMtoELL:\trowsNextCol calloc errd\n");
|
||||
goto _end;
|
||||
}
|
||||
CONSISTENCY_CHECKS{ //alloc and init aux arr for entries SORT CHECK
|
||||
if (!(_rowsLastCol = malloc(mat->M*sizeof(*_rowsLastCol)))){
|
||||
ERRPRINT("MMtoELL:\trowsLastCol malloc errd\n");
|
||||
goto _end;
|
||||
}
|
||||
memset(_rowsLastCol,-1,mat->M*sizeof(*_rowsLastCol));
|
||||
}
|
||||
///malloc dependant to MAX ROW LEN, err free in the caller
|
||||
if (!(mat->AS = calloc(mat->M * maxRow, sizeof(*(mat->AS))))){
|
||||
ERRPRINT("MMtoELL:\tELL->AS calloc errd\n");
|
||||
goto _end;
|
||||
} //zero init for auto rows residual fill with 0
|
||||
if (!(mat->JA = calloc(mat->M * maxRow, sizeof(*(mat->JA))))){
|
||||
ERRPRINT("MMtoELL:\tELL->JA calloc errd\n");
|
||||
goto _end;
|
||||
}
|
||||
|
||||
mat->MAX_ROW_NZ = maxRow;
|
||||
/*#ifdef ROWLENS
|
||||
*memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef
|
||||
*#endif
|
||||
*/
|
||||
///FILL NZ
|
||||
//TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS
|
||||
//sorted for each row (even nn sequential in @entries)
|
||||
for (ulong i=0; i<mat->NZ; i++){
|
||||
e = entries + i;
|
||||
CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE COLS SORTED for righe successive
|
||||
/*#pragma message("COO sorting check enabled")*/
|
||||
if (_rowsLastCol[e->row] >= (long) e->col){
|
||||
ERRPRINTS("not sorted entry:%ld,%ld,%lf",
|
||||
e->row,e->col,e->val);
|
||||
goto _end;
|
||||
}
|
||||
_rowsLastCol[e->row] = e->col;
|
||||
}
|
||||
col = rowsNextCol[e->row]++; //place entry in its row's sequent spot
|
||||
mat->AS[ IDX2D(e->row,col,maxRow) ] = e->val;
|
||||
mat->JA[ IDX2D(e->row,col,maxRow) ] = e->col;
|
||||
|
||||
}
|
||||
///FILL PAD
|
||||
ulong padded = 0,paddedEntries = mat->M*mat->MAX_ROW_NZ;
|
||||
for (ulong r=0; r<mat->M; r++){
|
||||
for (ulong c=rowLens[r],j=IDX2D(r,c,maxRow); c<maxRow; c++,j++,padded++){
|
||||
//mat->AS[j] = ELL_AS_FILLER; //TODO ALREADY DONE IN CALLOC
|
||||
//mat->JA[j] = mat->JA[rowLens[r]-1]; //ELL_JA_FILLER; //TODO calloc CUDA benefit?
|
||||
}
|
||||
}
|
||||
VERBOSE{
|
||||
printf("padded %lu entries = %lf%% of NZ\n",padded,100*padded/(double) mat->NZ);
|
||||
printf("ELL matrix of: %lu paddedEntries -> %lu MB of JA+AS\n",
|
||||
paddedEntries,(paddedEntries*sizeof(*(mat->JA))+paddedEntries*sizeof(*(mat->AS))) >> 20);
|
||||
}
|
||||
out = EXIT_SUCCESS;
|
||||
_end:
|
||||
if(rowsNextCol) free(rowsNextCol);
|
||||
if(_rowsLastCol) free(_rowsLastCol);
|
||||
return out;
|
||||
}
|
||||
////wrapper MM -> specialized target
|
||||
spmat* MMtoCSR(char* matPath){
|
||||
spmat* mat = NULL;
|
||||
MatrixMarket* mm = MMRead(matPath);
|
||||
if (!mm){
|
||||
ERRPRINT("MMtoCSR parse err\n");
|
||||
return NULL;
|
||||
}
|
||||
if (!(mat = calloc(1,sizeof(*mat)))){
|
||||
ERRPRINT("MMtoCSR: mat struct alloc errd");
|
||||
goto err;
|
||||
}
|
||||
mat -> M = mm->M;
|
||||
mat -> N = mm->N;
|
||||
mat -> NZ= mm->NZ;
|
||||
//alloc sparse matrix components
|
||||
if (!(mat->IRP = calloc(mat->M+1,sizeof(*(mat->IRP))))){
|
||||
ERRPRINT("MMtoCSR: IRP calloc err\n");
|
||||
goto err;
|
||||
}
|
||||
////alloc core struct of CSR
|
||||
if(!(mat->JA = malloc(mat->NZ*sizeof(*(mat->JA))))){
|
||||
ERRPRINT("MMtoCSR: JA malloc err\n");
|
||||
goto err;
|
||||
}
|
||||
if(!(mat->AS = malloc(mat->NZ*sizeof(*(mat->AS))))){
|
||||
ERRPRINT("MMtoCSR: AS malloc err\n");
|
||||
goto err;
|
||||
}
|
||||
if (COOtoCSR(mm->entries,mat,mm->rowLens)) goto err;
|
||||
#ifdef ROWLENS
|
||||
mat->RL = mm->rowLens;
|
||||
mm->rowLens = NULL; //avoid free in @freeMatrixMarket
|
||||
#endif
|
||||
|
||||
VERBOSE
|
||||
printf("MMtoCSR: %lu NZ entries-> %lu MB of AS+JA+IRP\n",mat->NZ,
|
||||
(mat->NZ*sizeof(*(mat->AS))+mat->NZ*sizeof(*(mat->JA))+(1+mat->M*sizeof(*(mat->IRP))))>>20);
|
||||
goto _free;
|
||||
|
||||
|
||||
err:
|
||||
if (mat) freeSpmat(mat);
|
||||
mat = NULL;
|
||||
_free:
|
||||
freeMatrixMarket(mm);
|
||||
return mat;
|
||||
}
|
||||
|
||||
|
||||
spmat* MMtoELL(char* matPath){
|
||||
spmat* mat = NULL;
|
||||
MatrixMarket* mm = MMRead(matPath);
|
||||
if (!mm){
|
||||
ERRPRINT("MMtoELL: parse err\n");
|
||||
return NULL;
|
||||
}
|
||||
if (!(mat = calloc(1,sizeof(*mat)))){
|
||||
ERRPRINT("MMtoELL: mat struct alloc errd");
|
||||
goto err;
|
||||
}
|
||||
////alloc core struct of CSR
|
||||
mat -> M = mm->M;
|
||||
mat -> N = mm->N;
|
||||
mat -> NZ= mm->NZ;
|
||||
if (COOtoELL(mm->entries,mat,mm->rowLens)) goto err;
|
||||
#ifdef ROWLENS
|
||||
mat->RL = mm->rowLens;
|
||||
mm->rowLens = NULL; //avoid free in @freeMatrixMarket
|
||||
#endif
|
||||
|
||||
goto _free;
|
||||
|
||||
err:
|
||||
if(mat) freeSpmat(mat);
|
||||
mat = NULL;
|
||||
_free:
|
||||
freeMatrixMarket(mm);
|
||||
return mat;
|
||||
}
|
Loading…
Reference in New Issue