From 49299912497b2781550786e2be318f5c36833a2f Mon Sep 17 00:00:00 2001 From: wlthr Date: Tue, 18 Jul 2023 10:44:04 +0200 Subject: [PATCH] changed all unsigned longs to int, modified makefile for debug purposes, reallocate c matrix arrays in sp3mm_impl.f90 --- base/serial/impl/sp3mm4amg/Makefile | 2 +- .../sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c | 18 +-- .../Sp3MM_CSR_OMP_SymbStep_Generic.c | 21 ++-- .../impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c | 78 ++++++------ .../sp3mm4amg/commons/sparseUtilsGeneric.c | 112 +++++++++--------- base/serial/impl/sp3mm4amg/commons/utils.c | 74 ++++++------ .../impl/sp3mm4amg/fbind/psb_f_spmm_ub.c | 74 +++++++++--- .../sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h | 6 +- .../include/Sp3MM_CSR_OMP_UB_Generic.h | 2 +- .../impl/sp3mm4amg/include/SpMMUtilsGeneric.h | 68 +++++------ base/serial/impl/sp3mm4amg/include/config.h | 6 +- .../impl/sp3mm4amg/include/inlineExports.h | 14 +-- .../sp3mm4amg/include/inlineExports_Generic.h | 8 +- base/serial/impl/sp3mm4amg/include/macros.h | 2 +- .../impl/sp3mm4amg/include/ompChunksDivide.h | 10 +- base/serial/impl/sp3mm4amg/include/parser.h | 14 +-- .../impl/sp3mm4amg/include/sparseMatrix.h | 2 +- .../sp3mm4amg/include/sparseUtilsGeneric.h | 24 ++-- base/serial/impl/sp3mm4amg/include/utils.h | 26 ++-- base/serial/impl/sp3mm4amg/lib/parser.c | 52 ++++---- base/serial/impl/sp3mm_impl.f90 | 52 ++++---- 21 files changed, 351 insertions(+), 314 deletions(-) diff --git a/base/serial/impl/sp3mm4amg/Makefile b/base/serial/impl/sp3mm4amg/Makefile index 9ceb1a75..d92af835 100644 --- a/base/serial/impl/sp3mm4amg/Makefile +++ b/base/serial/impl/sp3mm4amg/Makefile @@ -10,7 +10,7 @@ CWALL+=-Wno-unused-label -Wfatal-errors CINCL = -Iinclude/ CFLAGS = -g -O3 $(CWALL) $(CINCL) -fopenmp $(RUNTIME) MACROS = -DDEBUGPRINT="if(FALSE)" -DDEBUG="if(FALSE)" -DCONSISTENCY_CHECKS="if(FALSE)" -DVERBOSE="if(FALSE)" -DDEBUGCHECKS="if(FALSE)" -MACROSDBG = -DCONSISTENCY_CHECKS="if(TRUE)" -DDEBUGCHECKS="if(TRUE)" -DVERBOSE="if(TRUE)" -DDEBUG="if(TRUE)" +MACROSDBG = -DCONSISTENCY_CHECKS="if(TRUE)" -DDEBUGCHECKS="if(TRUE)" -DVERBOSE="if(TRUE)" -DDEBUG="if(TRUE)" -DDEBUGPRINT="if(TRUE)" LDFLAGS = -lm LIBDIR=../../../ diff --git a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c index fc4306c1..5867de7d 100644 --- a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c +++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_Num_Generic.c @@ -122,7 +122,7 @@ spmat* CAT(spmmRowByRow_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ bRowLen = B->IRP[ca+1] - B->IRP[ca]; CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc); } */ - for (ulong ja=A->IRP[r]-OFF_F; jaIRP[r+1]-OFF_F; ja++) //row-by-row formul + for (idx_t ja=A->IRP[r]-OFF_F; jaIRP[r+1]-OFF_F; ja++) //row-by-row formul CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc); //direct sparsify: trasform accumulated dense vector to a CSR row sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API @@ -165,10 +165,10 @@ spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ ///NUMERIC STEP //perform Gustavson over rows blocks -> M / @cfg->gridRows - ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows; + idx_t rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows; ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg); AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); - ulong b,startRow,block; //omp for aux vars + idx_t b,startRow,block; //omp for aux vars #pragma omp parallel for schedule(runtime) private(acc,startRow,block) for (b=0; b < cfg->gridRows; b++){ block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem); @@ -184,7 +184,7 @@ spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ bRowLen = B->IRP[ca+1] - B->IRP[ca]; CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc); } */ - for (ulong ja=A->IRP[r]-OFF_F; jaIRP[r+1]-OFF_F; ja++) //row-by-row formul + for (idx_t ja=A->IRP[r]-OFF_F; jaIRP[r+1]-OFF_F; ja++) //row-by-row formul CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc); //direct sparsify: trasform accumulated dense vector to a CSR row sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API @@ -209,7 +209,7 @@ spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ } spmat* CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ - DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N); + DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%dxN=%d\n",A->M,B->N); idx_t* bColOffsets = NULL; //B group columns starting offset for each row ACC_DENSE *accVectors=NULL,*accV; SPACC* accRowPart; @@ -218,10 +218,10 @@ spmat* CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ idx_t *rowsPartsSizes=NULL, *rowSizes=NULL; //for rows' cols partition, correct len if (!AB) goto _err; //2D indexing aux vars - ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; - ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; - ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; - ulong startRow,startCol,rowBlock,colBlock; //data division aux variables + idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; + idx_t _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; + idx_t _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; + idx_t startRow,startCol,rowBlock,colBlock; //data division aux variables ////get bColOffsets for B column groups if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) goto _err; diff --git a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Generic.c b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Generic.c index c41242a5..4ae12197 100644 --- a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Generic.c +++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_SymbStep_Generic.c @@ -114,7 +114,7 @@ static inline idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F) * sorting inplace the nodes inserted in the rbtree */ sortRbNode(nodes,abRowLen); #elif _OUT_IDXS == T || _COL_PARTS == T - uint i=0; + int i=0; idx_t k; #if _COL_PARTS == T //colParts aux vars @@ -144,7 +144,7 @@ static inline idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F) idx_t k; for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){ k = rb_entry(n,rbNode,rb)->key; - printf("%lu, ",k); + printf("%d, ",k); } printf("\n"); }*/ @@ -327,7 +327,8 @@ idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F) goto _err; } #endif //_COL_PARTS - uint maxThreads = omp_get_max_threads(); //TODO FROM CFG + int maxThreads; + maxThreads = omp_get_max_threads(); //TODO FROM CFG //index keeping aux struct //rbtree implementation or idxMap with aux of symbTree for tmp outIdx keeping if ( rbTreeUsed ){ @@ -340,7 +341,7 @@ idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F) goto _err; } //init roots - for (uint i=0; iN,idxsMapAccs+i)) goto _err; } } @@ -409,7 +410,7 @@ idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F) free(rbRoots); free(rbNodes); if (idxsMapAccs){ - for (uint i=0; ikey; for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){ outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key; @@ -495,7 +496,7 @@ idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F) *outIdxs[i] = upperBoundedSymMat + cumul; #endif //#if _OUT_IDXS == TRUE //rbTrees for index keeping - uint maxThreads = omp_get_max_threads(); //TODO FROM CFG + int maxThreads = omp_get_max_threads(); //TODO FROM CFG idx_t abMaxRowLen = reductionMaxSeq(abUpperBoundedRowsLens, a->M); #ifdef HEURISTICS_UB idx_t maxRowLenUB = abMaxRowLen * SP3MM_UB_HEURISTIC; //TODO UB HEURISTC @@ -515,7 +516,7 @@ idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F) goto _err; } //init roots - for (uint i=0; iN,idxsMapAccs+i)) goto _err; } } diff --git a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c index a5e6d3be..7112d1a0 100644 --- a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c +++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c @@ -42,7 +42,7 @@ spmat* CAT(spmmSerial_,OFF_F)(spmat* A,spmat* B, CONFIG* _cfg){ //serial impleme if ( allocAccDense(&acc,B->N) ) goto _free; if (!(AB = allocSpMatrix(A->M,B->N))) goto _free; for( idx_t r=0; rM; r++ ){ - for (ulong c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) //row-by-row formul + for (idx_t c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) //row-by-row formul CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, &acc); sparsifyDirect(&acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API } @@ -54,7 +54,7 @@ spmat* CAT(spmmSerial_,OFF_F)(spmat* A,spmat* B, CONFIG* _cfg){ //serial impleme ////////Sp3MM as 2 x SpMM ///1D spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ - DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N); + DEBUG printf("spmm\trows of A,\tfull B\tM=%d x N=%d\n",A->M,B->N); ///thread aux ACC_DENSE *accVects = NULL,*acc; SPMM_ACC* outAccumul=NULL; @@ -83,7 +83,7 @@ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg); AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); #pragma omp parallel for schedule(runtime) private(acc) - for (ulong r=0; rM; r++){ //row-by-row formulation + for (idx_t r=0; rM; r++){ //row-by-row formulation //iterate over nz entry index c inside current row r acc = accVects + omp_get_thread_num(); /* direct use of sparse scalar vector multiplication @@ -93,7 +93,7 @@ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ bRowLen = B->IRP[ca+1] - B->IRP[ca]; CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc); }*/ - for (ulong c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) //row-by-row formul + for (idx_t c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) //row-by-row formul CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc); //trasform accumulated dense vector to a CSR row #if SPARSIFY_PRE_PARTITIONING == T @@ -124,7 +124,7 @@ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ } idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void** accumul, void **rows_sizes, void** tmp_matrix){ - DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N); + DEBUG printf("spmm\trows of A,\tfull B\tM=%d x N=%d\n",A->M,B->N); ///thread aux ACC_DENSE *accVects = NULL,*acc; SPMM_ACC* outAccumul=NULL; @@ -153,7 +153,7 @@ idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg); AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); #pragma omp parallel for schedule(runtime) private(acc) - for (ulong r=0; rM; r++){ //row-by-row formulation + for (idx_t r=0; rM; r++){ //row-by-row formulation //iterate over nz entry index c inside current row r acc = accVects + omp_get_thread_num(); /* direct use of sparse scalar vector multiplication @@ -163,7 +163,7 @@ idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void bRowLen = B->IRP[ca+1] - B->IRP[ca]; CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc); }*/ - for (ulong c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) //row-by-row formul + for (idx_t c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) //row-by-row formul CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc); //trasform accumulated dense vector to a CSR row #if SPARSIFY_PRE_PARTITIONING == T @@ -197,7 +197,7 @@ idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void if(outAccumul) freeSpMMAcc(outAccumul); } -void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double** AS, idx_t** JA, idx_t** IRP){ +void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double* AS, idx_t* JA, idx_t* IRP){ SPMM_ACC* outAccumul= *accumul; idx_t* rowsSizes = *rows_sizes; spmat *AB = *tmp_matrix; @@ -216,7 +216,7 @@ void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** } spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ - DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N); + DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%d x N=%d\n",A->M,B->N); DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); ///thread aux ACC_DENSE *accVects = NULL,*acc; @@ -243,10 +243,10 @@ spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ #endif //perform Gustavson over rows blocks -> M / @cfg->gridRows - ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows; + idx_t rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows; ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg); AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); - ulong b,startRow,block; //omp for aux vars + idx_t b,startRow,block; //omp for aux vars #pragma omp parallel for schedule(runtime) private(acc,startRow,block) for (b=0; b < cfg->gridRows; b++){ block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem); @@ -255,13 +255,13 @@ spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ DEBUGPRINT{ fflush(NULL); - printf("block %lu\t%lu:%lu(%lu)\n",b,startRow,startRow+block-1,block); + printf("block %d\t%d:%d(%d)\n",b,startRow,startRow+block-1,block); fflush(NULL); } //row-by-row formulation in the given row block - for (ulong r=startRow; rIRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) + for (idx_t c=A->IRP[r]-OFF_F; cIRP[r+1]-OFF_F; c++) CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc); //trasform accumulated dense vector to a CSR row #if SPARSIFY_PRE_PARTITIONING == T @@ -295,7 +295,7 @@ spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ ///2D //PARTITIONS NOT ALLOCATED spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ - DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N); + DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%dxN=%d\n",A->M,B->N); DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); idx_t* bColOffsets = NULL; //B group columns starting offset for each row ACC_DENSE *accVectors=NULL,*accV; @@ -305,10 +305,10 @@ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ idx_t* rowsPartsSizes=NULL; if (!AB) goto _err; //2D indexing aux vars - ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; - ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; - ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; - ulong startRow,startCol,rowBlock,colBlock; //data division aux variables + idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; + idx_t _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; + idx_t _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; + idx_t startRow,startCol,rowBlock,colBlock; //data division aux variables ////get bColOffsets for B column groups if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) goto _err; @@ -344,8 +344,8 @@ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg); AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); - ulong tileID,t_i,t_j; //for aux vars - ulong bPartLen,bPartID,bPartOffset;//B partition acces aux vars + idx_t tileID,t_i,t_j; //for aux vars + idx_t bPartLen,bPartID,bPartOffset;//B partition acces aux vars #pragma omp parallel for schedule(runtime) \ private(accV,accRowPart,rowBlock,colBlock,startRow,startCol,\ bPartLen,bPartID,bPartOffset,t_i,t_j) @@ -364,15 +364,15 @@ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ DEBUGPRINT{ fflush(NULL); colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem); - printf("rowBlock [%lu\t%lu:%lu(%lu)]\t",t_i,startRow,startRow+rowBlock-1,rowBlock); - printf("colBlock [%lu\t%lu:%lu(%lu)]\n",t_j,startCol,startCol+colBlock-1,colBlock); + printf("rowBlock [%d\t%d:%d(%d)]\t",t_i,startRow,startRow+rowBlock-1,rowBlock); + printf("colBlock [%d\t%d:%d(%d)]\n",t_j,startCol,startCol+colBlock-1,colBlock); fflush(NULL); } ///AB[t_i][t_j] block compute - for (ulong r=startRow; rIRP[r]-OFF_F,c; jIRP[r+1]-OFF_F; j++){ + for (idx_t j=A->IRP[r]-OFF_F,c; jIRP[r+1]-OFF_F; j++){ //get start of B[A->JA[j]][:colBlock:] c = A->JA[j]-OFF_F; // col of nnz in A[r][:] <-> target B row bPartID = IDX2D(c,t_j,cfg->gridCols); @@ -415,7 +415,7 @@ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ } spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ - DEBUG printf("spmm\trowBlocks of A,\tcolBlocks (allcd) of B\tM=%luxN=%lu\n",A->M,B->N); + DEBUG printf("spmm\trowBlocks of A,\tcolBlocks (allcd) of B\tM=%dxN=%d\n",A->M,B->N); DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); spmat *AB = NULL, *colPartsB = NULL, *colPart; idx_t* rowsPartsSizes=NULL; @@ -423,14 +423,14 @@ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg) SPMM_ACC* outAccumul=NULL; ACC_DENSE *accVectors=NULL,*accV; SPACC* accRowPart; - ulong startRow,startCol,rowBlock,colBlock; //data division aux variables + idx_t startRow,startCol,rowBlock,colBlock; //data division aux variables //2D indexing aux vars idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; idx_t* bColOffsets = NULL; if (!(AB = allocSpMatrix(A->M,B->N))) goto _err; - ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; - ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; + idx_t _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; + idx_t _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; ////B cols partition in CSRs //if (!(colPartsB = CAT(colsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) goto _err; @@ -465,7 +465,7 @@ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg) ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg); AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); - ulong tileID,t_i,t_j; //for aux vars + idx_t tileID,t_i,t_j; //for aux vars #pragma omp parallel for schedule(runtime) \ private(accV,accRowPart,colPart,rowBlock,colBlock,startRow,startCol,t_i,t_j) for (tileID = 0; tileID < gridSize; tileID++){ @@ -484,17 +484,17 @@ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg) DEBUGPRINT{ fflush(NULL); - printf("rowBlock [%lu\t%lu:%lu(%lu)]\t", + printf("rowBlock [%d\t%d:%d(%d)]\t", t_i,startRow,startRow+rowBlock-1,rowBlock); - printf("colBlock [%lu\t%lu:%lu(%lu)]\n", + printf("colBlock [%d\t%d:%d(%d)]\n", t_j,startCol,startCol+colBlock-1,colBlock); fflush(NULL); } ///AB[t_i][t_j] block compute - for (ulong r=startRow; rIRP[r]-OFF_F,c,bRowStart,bRowLen; jIRP[r+1]-OFF_F; j++){ + for (idx_t j=A->IRP[r]-OFF_F,c,bRowStart,bRowLen; jIRP[r+1]-OFF_F; j++){ //get start of B[A->JA[j]][:colBlock:] c = A->JA[j]-OFF_F; // column of nnz entry in A[r][:] <-> target B row bRowStart = colPart->IRP[c]; @@ -532,7 +532,7 @@ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg) AB = NULL; _free: if (colPartsB){ - for (ulong i=0; igridCols; i++) + for (idx_t i=0; igridCols; i++) freeSpmatInternal(colPartsB+i); free(colPartsB); } @@ -558,7 +558,7 @@ spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P, /* TODO alloc dense aux vector, reusable over 3 product TODO arrays sovrallocati per poter essere riusati nelle 2 SpMM - ulong auxVectSize = MAX(R->N,AC->N); + idx_t auxVectSize = MAX(R->N,AC->N); auxVectSize = MAX(auxVectSize,P->N); */ @@ -590,7 +590,7 @@ spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P, spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg, SPMM_INTERF spmm){ - ulong* rowSizes = NULL; + idx_t* rowSizes = NULL; SPMM_ACC* outAccumul=NULL; ACC_DENSE *accVectorsR_AC=NULL,*accVectorsRAC_P=NULL,*accRAC,*accRACP; @@ -619,11 +619,11 @@ spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg, goto _err; } - ulong c; + idx_t c; ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (R->M,R,cfg); AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); #pragma omp parallel for schedule(runtime) private(accRAC,accRACP,c) - for (ulong r=0; rM; r++){ //row-by-row formulation + for (idx_t r=0; rM; r++){ //row-by-row formulation //iterate over nz entry index c inside current row r accRAC = accVectorsR_AC + omp_get_thread_num(); accRACP = accVectorsRAC_P + omp_get_thread_num(); diff --git a/base/serial/impl/sp3mm4amg/commons/sparseUtilsGeneric.c b/base/serial/impl/sp3mm4amg/commons/sparseUtilsGeneric.c index e3cea72b..08720fd6 100644 --- a/base/serial/impl/sp3mm4amg/commons/sparseUtilsGeneric.c +++ b/base/serial/impl/sp3mm4amg/commons/sparseUtilsGeneric.c @@ -46,10 +46,10 @@ * alloc threads' aux arrays once and split them in threads' structures * so free them once from the first thread struct, with the original pointers returned from the alloc */ -ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size){ //TODO PERF WITH NEXT +ACC_DENSE* _initAccVectors_monoalloc(idx_t num,idx_t size){ //TODO PERF WITH NEXT ACC_DENSE* out = NULL; double* vAll = NULL; - ulong* vAllNzIdx = NULL; + idx_t* vAllNzIdx = NULL; if (!(out = calloc(num,sizeof(*out)))){ ERRPRINT("_initAccVectors aux struct alloc failed\n"); return NULL; @@ -62,7 +62,7 @@ ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size){ //TODO PERF WITH NEX ERRPRINT("_initAccVectors aux dense vectors' idx alloc failed\n"); goto err; } - for (ulong i=0; ivLen = size; if (!(v->v = calloc(size,sizeof(*(v->v))))) { ERRPRINT("_initAccVectors aux dense vector alloc failed\n"); @@ -91,19 +91,19 @@ int allocAccDense(ACC_DENSE* v,ulong size){ return EXIT_SUCCESS; } -ACC_DENSE* _initAccVectors(ulong num,ulong size){ +ACC_DENSE* _initAccVectors(idx_t num,idx_t size){ ACC_DENSE* out = NULL; if (!(out = calloc(num,sizeof(*out)))){ ERRPRINT("_initAccVectors aux struct alloc failed\n"); return NULL; } - for (ulong i=0; iM; r++){ +void checkOverallocPercent(idx_t* forecastedSizes,spmat* AB){ + for (idx_t r=0,rSize,forecastedSize; r < AB->M; r++){ forecastedSize = forecastedSizes[r]; #ifdef ROWLENS rSize = AB->RL[r]; @@ -158,11 +158,11 @@ void checkOverallocPercent(ulong* forecastedSizes,spmat* AB){ } } DEBUGPRINT - printf("extra forecastedSize of row: %lu\t=\t%lf %% \n", + printf("extra forecastedSize of row: %d\t=\t%lf %% \n", r,100*(forecastedSize-rSize) / (double) forecastedSize); } idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ; - printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n", + printf("extra forecastedSize of the matrix: \t%d\t = %lf %% \n", extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]); } int spmatDiff(spmat* A, spmat* B){ @@ -187,8 +187,8 @@ int spmatDiff(spmat* A, spmat* B){ double* CSRToDense(spmat* sparseMat){ double* denseMat; - ulong i,j,idxNZ,denseSize; - if (__builtin_umull_overflow(sparseMat->M,sparseMat->N,&denseSize)){ + idx_t i,j,idxNZ,denseSize; + if (__builtin_smul_overflow(sparseMat->M,sparseMat->N,&denseSize)){ ERRPRINT("overflow in dense allocation\n"); return NULL; } @@ -200,7 +200,7 @@ double* CSRToDense(spmat* sparseMat){ for (idxNZ=sparseMat->IRP[i]; idxNZIRP[i+1]; ++idxNZ){ j = sparseMat->JA[idxNZ]; //converting sparse item into dense entry - denseMat[(ulong) IDX2D(i,j,sparseMat->N)] = sparseMat->AS[idxNZ]; + denseMat[(idx_t) IDX2D(i,j,sparseMat->N)] = sparseMat->AS[idxNZ]; } } return denseMat; @@ -214,17 +214,17 @@ void printSparseMatrix(spmat* spMatrix,char justNZMarkers){ -static inline int _colsPartitioningUnifRanges_init(spmat* A,uint gridCols, +static inline int _colsPartitioningUnifRanges_init(spmat* A,int gridCols, spmat** colParts,idx_t** colPartsLens){ spmat* colPart; - ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *tmpJA; + idx_t _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *tmpJA; ///alloc/init partitions structures if (!(*colParts = calloc(gridCols, sizeof(**colParts)))){ ERRPRINT("colsPartitioningUnifRanges\tcolumns partitions of A calloc fail\n"); return EXIT_FAILURE; } - for (ulong i=0,colBlock; iM,colBlock,colPart)){ @@ -249,13 +249,13 @@ static inline int _colsPartitioningUnifRanges_init(spmat* A,uint gridCols, return EXIT_SUCCESS; } -static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,uint gridCols, +static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,int gridCols, spmat* colParts,idx_t* colPartsLens){ spmat* colPart; double* tmpAS; idx_t* tmpJA; //realloc overallcd A parts NZ arrays (TODO -> downsizing -> nofails?) - for (ulong i=0,partLen; iAS,partLen*sizeof(*(colPart->AS))))){ @@ -281,22 +281,22 @@ static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,uint gridCol #endif //////////////////////// CSR SPECIFIC -- TODO RENAME ////////////////// ///SPARSE MATRIX PARTITIONING -idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){ - ulong subRowsN = A->M * gridCols; - ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols; - ulong* offsets = malloc( (subRowsN+1) * sizeof(*offsets) ); +idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,int gridCols){ + idx_t subRowsN = A->M * gridCols; + idx_t _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols; + idx_t* offsets = malloc( (subRowsN+1) * sizeof(*offsets) ); if (!offsets) { ERRPRINT("colsOffsetsPartitioningUnifRanges:\toffsets malloc errd\n"); return NULL; } ///OFFSETS COMPUTE FOR COL GROUPS -> O( A.NZ ) - for (ulong r=0, j=0; rM; j=A->IRP[++r]-OFF_F){ + for (idx_t r=0, j=0; rM; j=A->IRP[++r]-OFF_F){ offsets[ IDX2D(r,0,gridCols) ] = j; //row's first gc start is costrained //navigate column groups inside current row - for (ulong gc=1,gcStartCol; gcJA[j]-OFF_F; cIRP[r+1]-OFF_F; c=A->JA[++j]-OFF_F); + //for (idx_t c=A->JA[j]-OFF_F; cIRP[r+1]-OFF_F; c=A->JA[++j]-OFF_F); while (j < A->IRP[r+1]-OFF_F && A->JA[j]-OFF_F < gcStartCol) j++; offsets[ IDX2D(r,gc,gridCols) ] = j; //row's gc group startIdx @@ -306,12 +306,12 @@ idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){ return offsets; } -spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols, +spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,int gridCols, idx_t** colPartsOffsets) { spmat *colParts = NULL, *colPart; - ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols; - ulong *colPartsLens=NULL, *tmpJA; + idx_t _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols; + idx_t *colPartsLens=NULL, *tmpJA; double* tmpAS; ///alloc/init partitions structures @@ -349,15 +349,15 @@ spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols, return colParts; _err: free(*colPartsOffsets); - for (ulong i=0; iN/gridCols, _colBlockRem = A->N%gridCols, *colPartsLens=NULL, *tmpJA; + idx_t _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *colPartsLens=NULL, *tmpJA; double* tmpAS; ///alloc/init partitions structures if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens)) @@ -366,9 +366,9 @@ spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){ * Parallelize: 2for collapse OMP, gcEndCol -> startIdxize, ... * oppure wrappare cio in static inline */ - for (ulong r=0, j=0; rM; j=A->IRP[++r]-OFF_F){ + for (idx_t r=0, j=0; rM; j=A->IRP[++r]-OFF_F){ //navigate column groups inside current row - for (ulong gc=0,gcEndCol=0,i; gcIRP[r] = colPartsLens[gc]; gcEndCol += UNIF_REMINDER_DISTRI(gc,_colBlock,_colBlockRem); //goto next GroupCols,keeping A's nnz entries navigation ( index j+i ) - //for (ulong c=A->JA[j+i]-OFF_F; cIRP[r+1]-OFF_F; c=A->JA[j+ ++i]-OFF_F); + //for (idx_t c=A->JA[j+i]-OFF_F; cIRP[r+1]-OFF_F; c=A->JA[j+ ++i]-OFF_F); while ( j+i < A->IRP[r+1]-OFF_F && A->JA[j+i]-OFF_F < gcEndCol ) i++; memcpy(colPart->AS+colPart->IRP[r], A->AS+j, i*sizeof(*A->AS)); memcpy(colPart->JA+colPart->IRP[r], A->JA+j, i*sizeof(*A->JA)); @@ -393,14 +393,14 @@ spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){ free(colPartsLens); return colParts; _err: - for (ulong i=0; i= rLen); } idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ; - printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n", + printf("extra forecastedSize of the matrix: \t%d\t = %lf %% \n", extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]); free(abColOffsets); @@ -422,25 +422,25 @@ void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB, ///inline export here //SPMV_CHUNKS_DISTR spmvChunksFair; -spmat* allocSpMatrix(ulong rows, ulong cols); -int allocSpMatrixInternal(ulong rows, ulong cols, spmat* mat); +spmat* allocSpMatrix(idx_t rows, idx_t cols); +int allocSpMatrixInternal(idx_t rows, idx_t cols, spmat* mat); void freeSpmatInternal(spmat* mat); void freeSpmat(spmat* mat); ////INTERNAL TEST FUNCTIONS //test that each row's partition from colsOffsetsPartitioningUnifRanges is in the correct index range #include -int testColsOffsetsPartitioningUnifRanges(spmat* mat,ulong gridCols,ulong* partsOffs){ - ulong _colBlock = mat->N/gridCols, _colBlockRem = mat->N%gridCols; - ulong j=0; //CSR scanning nnz idx - ulong* colPartsPopulations = alloca(gridCols * sizeof(*colPartsPopulations)); +int testColsOffsetsPartitioningUnifRanges(spmat* mat,idx_t gridCols,idx_t* partsOffs){ + idx_t _colBlock = mat->N/gridCols, _colBlockRem = mat->N%gridCols; + idx_t j=0; //CSR scanning nnz idx + idx_t* colPartsPopulations = alloca(gridCols * sizeof(*colPartsPopulations)); memset(colPartsPopulations,0,gridCols * sizeof(*colPartsPopulations)); - for (ulong r=0,pId=0; rM; r++){ - for (ulong gc=0,pStartIdx,pEndIdx; gcM; r++){ + for (idx_t gc=0,pStartIdx,pEndIdx; gcJA[idx]; assert(j == idx); //consecutive index in partitioning assert(pStartIdx <= c && c <= pEndIdx); //colRange @@ -450,12 +450,12 @@ int testColsOffsetsPartitioningUnifRanges(spmat* mat,ulong gridCols,ulong* parts } } assert(j == mat->NZ); - ulong s=0; - for (ulong gc=0,partSize; gc < gridCols; gc++,s+=partSize){ + idx_t s=0; + for (idx_t gc=0,partSize; gc < gridCols; gc++,s+=partSize){ partSize = colPartsPopulations[gc]; double partShare=partSize/(double)mat->NZ,partsAvg=1/(double)gridCols; double partShareAvgDiff = partShare - partsAvg; - printf("colPartition %lu has:\t%lu = %lf of NNZ\t\t .. %lf\tAVG diff\n", + printf("colPartition %d has:\t%d = %lf of NNZ\t\t .. %lf\tAVG diff\n", gc,partSize,partShare,partShareAvgDiff); } assert(s == mat->NZ); //TODO DUPLICATED @@ -482,14 +482,14 @@ int main(int argc, char** argv){ return out; } ////partitioning test - ulong* colsPartitions = colsOffsetsPartitioningUnifRanges_0(mat,Conf.gridCols); + idx_t* colsPartitions = colsOffsetsPartitioningUnifRanges_0(mat,Conf.gridCols); if (!colsPartitions) goto _free; if (testColsOffsetsPartitioningUnifRanges(mat,Conf.gridCols,colsPartitions)) goto _free; out=EXIT_SUCCESS; printf("testColsOffsetsPartitioningUnifRanges passed with " - "mat: %lux%lu-%luNNZ\tgrid: %dx%d\n", + "mat: %dx%d-%dNNZ\tgrid: %dx%d\n", mat->M,mat->N,mat->NZ,Conf.gridRows,Conf.gridCols); _free: if (colsPartitions) free(colsPartitions); diff --git a/base/serial/impl/sp3mm4amg/commons/utils.c b/base/serial/impl/sp3mm4amg/commons/utils.c index d969bf82..8705bece 100644 --- a/base/serial/impl/sp3mm4amg/commons/utils.c +++ b/base/serial/impl/sp3mm4amg/commons/utils.c @@ -145,12 +145,12 @@ int fread_wrap(FILE* fp,void* dst,size_t count){ return rd; } ///STRUCTURED DATA IO -int writeDoubleVector(char* fpath,double* v,ulong size){ +int writeDoubleVector(char* fpath,double* v,idx_t size){ int fd,out=EXIT_FAILURE; if ( (fd = createNewFile(fpath) ) < 0 ) goto _end; write_wrap(fd,v,size * sizeof(*v)); //raw write of vector out = EXIT_SUCCESS; - DEBUG printf("written double vector into %s RAW of %lu elements\n",fpath,size); + DEBUG printf("written double vector into %s RAW of %d elements\n",fpath,size); _end: if (close(fd) == EOF) perror("close errd\n"); @@ -158,21 +158,21 @@ int writeDoubleVector(char* fpath,double* v,ulong size){ } ///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF -int writeDoubleVectorAsStr(char* fpath,double* v,ulong size){ +int writeDoubleVectorAsStr(char* fpath,double* v,idx_t size){ int out=EXIT_FAILURE; FILE* fp = fopen(fpath,"w"); if (!fp){ perror("fopen vector file write"); return EXIT_FAILURE; } - for (ulong i=0; i= vectorSize ){ //realloc the array vectorSize *= VECTOR_STEP_REALLOC; if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){ - ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20); + ERRPRINTS("realloc errd to ~~ %d MB\n",vectorSize >> 20); goto _err; } out = tmp; - DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20); + DEBUG printf("reallocd to ~~ %d MB\n",vectorSize >> 20); } - ulong toRead = MIN(VECTOR_READ_BLOCK,(vectorSize-i)); + idx_t toRead = MIN(VECTOR_READ_BLOCK,(vectorSize-i)); if((rd = fread_wrap(fp,out + i,sizeof(*out)*toRead)) == -2){ ERRPRINT("fread_wrap errd\n"); goto _err; @@ -227,7 +227,7 @@ double* readDoubleVector(char* fpath,ulong* size){ goto _err; } out = tmp; - DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size); + DEBUG printf("readed vector from %s of %d elements\n",fpath,*size); goto _free; _err: @@ -238,10 +238,10 @@ double* readDoubleVector(char* fpath,ulong* size){ return out; } -double* readDoubleVectorStr(char* fpath,ulong* size){ +double* readDoubleVectorStr(char* fpath,idx_t* size){ int fscanfOut; double *out,*tmp; - ulong i=0,vectorSize=RNDVECTORSIZE; + idx_t i=0,vectorSize=RNDVECTORSIZE; if (*size) vectorSize = *size; FILE* fp = fopen(fpath,"r"); if (!fp){ @@ -256,11 +256,11 @@ double* readDoubleVectorStr(char* fpath,ulong* size){ if (i >= vectorSize ){ //realloc the array vectorSize *= VECTOR_STEP_REALLOC; if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){ - ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20); + ERRPRINTS("realloc errd to ~~ %d MB\n",vectorSize >> 20); goto _err; } out = tmp; - DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20); + DEBUG printf("reallocd to ~~ %d MB\n",vectorSize >> 20); } fscanfOut = fscanf(fp,DOUBLE_STR_FORMAT,out + i++ ); if ( fscanfOut == EOF && ferror(fp)){ @@ -277,7 +277,7 @@ double* readDoubleVectorStr(char* fpath,ulong* size){ goto _err; } out = tmp; - DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size); + DEBUG printf("readed vector from %s of %d elements\n",fpath,*size); goto _free; _err: @@ -292,10 +292,10 @@ double* readDoubleVectorStr(char* fpath,ulong* size){ int getConfig(CONFIG* conf){ int changes=EXIT_FAILURE; char *varVal,*ptr; - ulong val; + idx_t val; if ((varVal = getenv(GRID_ROWS))){ val=strtoul(varVal,&ptr,10); - if (ptr==varVal || val>= UINT_MAX){ + if (ptr==varVal || val>= INT_MAX){ perror("strtol errd"); } else { conf->gridRows = val; @@ -304,7 +304,7 @@ int getConfig(CONFIG* conf){ } if ((varVal = getenv(GRID_COLS))){ val=strtoul(varVal,&ptr,10); - if (ptr==varVal || val>= UINT_MAX){ + if (ptr==varVal || val>= INT_MAX){ perror("strtol errd"); } else { conf->gridCols = val; @@ -317,11 +317,11 @@ int getConfig(CONFIG* conf){ /////LIB-SORTING -- WRAPPERS //comparing functions static inline int cmp_idx_t(const void* a, const void*b){ - idx_t aa=*((ulong*) a), bb = *((ulong*) b); + idx_t aa=*((idx_t*) a), bb = *((idx_t*) b); return aa==bb?0:aa>bb?1:-1; } -static inline int cmpulong(const void* a, const void*b){ - ulong aa=*((ulong*) a), bb = *((ulong*) b); +static inline int cmpidx_t(const void* a, const void*b){ + idx_t aa=*((idx_t*) a), bb = *((idx_t*) b); return aa==bb?0:aa>bb?1:-1; } static inline int cmpuint(const void* a, const void*b){ @@ -336,8 +336,8 @@ static inline int cmpRbNode(const void* a, const void* b){ void sort_idx_t(idx_t* arr, idx_t len){ qsort(arr,len,sizeof(*arr),cmp_idx_t); } -void sortulong(ulong* arr, ulong len){ - qsort(arr,len,sizeof(*arr),cmpulong); +void sortidx_t(idx_t* arr, idx_t len){ + qsort(arr,len,sizeof(*arr),cmpidx_t); } void sortuint(uint* arr, uint len){ qsort(arr,len,sizeof(*arr),cmpuint); @@ -368,7 +368,7 @@ static inline int rndDouble_sinDecimal(double* d){ ERRPRINT("read_wrap failed to read holder for rnd double\n"); return EXIT_FAILURE; } - *d = (_rndHold % (ulong) ceil(MAXRND)) + sin(_rndHold); + *d = (_rndHold % (idx_t) ceil(MAXRND)) + sin(_rndHold); return EXIT_SUCCESS; } @@ -383,8 +383,8 @@ void statsAvgVar(double* values,uint numVals, double* out){ } /// MATRIX - VECTOR COMPUTE UTILS -int fillRndVector(ulong size, double* v){ - for( ulong x=0; x DOUBLE_DIFF_THREASH ){ out = EXIT_FAILURE; ERRPRINTS("DOUBLE VECTORS DIFF: DOUBLE_DIFF_THREASH=%lf\t<\t" - "|%13lg| = %lf %% of @a[%lu]\n", + "|%13lg| = %lf %% of @a[%d]\n", DOUBLE_DIFF_THREASH,diff,100*diffAbs/ABS(a[i]),i); #ifdef DOUBLE_VECT_DIFF_EARLY_EXIT /*#pragma message("DOUBLE_VECT_DIFF_EARLY_EXIT: only first diff double reported")*/ @@ -415,7 +415,7 @@ int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax){ if ( ABS(*diffMax) < diffAbs ) *diffMax = diff; } DEBUG{ - printf("\nchecked diff %s"CEND" between 2 double vector of %lu elements" + printf("\nchecked diff %s"CEND" between 2 double vector of %d elements" "\tmax diff: %le %s threash: %le\n", !out?CCC"OK":CCCERR"ERR!", n,*diffMax,*diffMax=", DOUBLE_DIFF_THREASH); @@ -427,9 +427,9 @@ int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax){ return out; } -void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers){ - printf("printing matrix: %lu x %lu\n",m,n); - ulong i,j; +void printMatrix(double* mat,idx_t m,idx_t n,char justNZMarkers){ + printf("printing matrix: %d x %d\n",m,n); + idx_t i,j; for (i=0;iRL = b_rl; #endif // ROWLENS - // setting up cfg - // TODO : CHECK THAT THIS IS COMPATIBLE WITH PSB - rc = getConfig(&cfg); + if (!getConfig(&Conf)){ + VERBOSE printf("configuration changed from env"); + } + //save exported parallelization grid configured ... see Config.md in the top folder + //const ushort GRIDROWS = Conf.gridRows; + //const ushort GRIDCOLS = Conf.gridCols; + const ushort ORIG_GRID_CONF[2] = {Conf.gridRows,Conf.gridCols}; + + int maxThreads = omp_get_max_threads(); + Conf.threadNum = (uint) maxThreads; + DEBUG printf("omp_get_max_threads:\t%d\n",maxThreads); - // TODO : change chunk distribution with a choice ? - cfg.chunkDistrbFunc = &chunksFair; - cfg.threadNum = 1; + /* + * get exported schedule configuration, + * if schedule != static -> dyn like -> set a chunk division function before omp for + */ + int schedKind_chunk_monotonic[3]; + ompGetRuntimeSchedule(schedKind_chunk_monotonic); + Conf.chunkDistrbFunc = &chunksNOOP; + if (schedKind_chunk_monotonic[0] != omp_sched_static) + Conf.chunkDistrbFunc = chunkDistrbFunc; + VERBOSE + printf("%s",Conf.chunkDistrbFunc == &chunksNOOP?"static schedule =>chunkDistrbFunc NOOP\n":""); + + int i, row_nz; - printf("irp %d %d %d %d %d\n", a_irp[0], a_irp[1], a_irp[2], a_irp[3], a_irp[4]); // setting up spmat type matrices a.M = a_m; a.N = a_n; @@ -55,7 +81,13 @@ void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz, a.AS = a_as; a.JA = a_ja; a.IRP = a_irp; - a.MAX_ROW_NZ = a_max_row_nz; + a.MAX_ROW_NZ = a.IRP[1] - a.IRP[0]; + for (i = 1; i < a.M; i ++){ + row_nz = a.IRP[i+1] - a.IRP[i]; + if (row_nz > a.MAX_ROW_NZ){ + a.MAX_ROW_NZ = row_nz; + } + } b.M = b_m; b.N = b_n; @@ -63,13 +95,20 @@ void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz, b.AS = b_as; b.JA = b_ja; b.IRP = b_irp; - b.MAX_ROW_NZ = b_max_row_nz; + b.MAX_ROW_NZ = b.IRP[1] - b.IRP[0]; + for (i = 1; i < b.M; i ++){ + row_nz = b.IRP[i+1] - b.IRP[i]; + if (row_nz > b.MAX_ROW_NZ){ + b.MAX_ROW_NZ = row_nz; + } + } + // computing the size switch (impl_choice) { case ROW_BY_ROW_UB: - *nnz = spmmRowByRowCalculateSize_1(&a, &b, &cfg, accumul, rows_sizes, tmp_matrix); + *nnz = spmmRowByRowCalculateSize_1(&a, &b, &Conf, accumul, rows_sizes, tmp_matrix); default: break; } @@ -79,11 +118,14 @@ void psb_f_spmm_merge_spacc(void **accumul, void **rows_sizes, void **tmp_matrix, enum impl_types impl_choice, - double **c_as, - idx_t **c_ja, - idx_t **c_irp, + double **c_as_ptr, + idx_t **c_ja_ptr, + idx_t **c_irp_ptr, int *info) { + double *c_as = *c_as_ptr; + idx_t *c_ja = *c_ja_ptr; + idx_t *c_irp = *c_irp_ptr; // merging the rows into the correct arrays switch (impl_choice) { diff --git a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h index 57676b59..6dd4dc0d 100644 --- a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h +++ b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_Multi.h @@ -39,10 +39,10 @@ //hold SPMM result over a unpartitionated space among threads-row[s' blocks] typedef struct{ //space to hold SPMM output - ulong* JA; + idx_t* JA; double* AS; - ulong size; //num of entries allocated -> only dbg checks - ulong lastAssigned; //last JA&AS assigned index to an accumulator(atom) + idx_t size; //num of entries allocated -> only dbg checks + idx_t lastAssigned; //last JA&AS assigned index to an accumulator(atom) SPACC* accs; //SPARSIFIED ACC POINTERS uint accsNum; } SPMM_ACC; //accumulator for SPMM diff --git a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_UB_Generic.h b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_UB_Generic.h index f1e19402..b4e471cc 100644 --- a/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_UB_Generic.h +++ b/base/serial/impl/sp3mm4amg/include/Sp3MM_CSR_OMP_UB_Generic.h @@ -61,7 +61,7 @@ SPMM CAT(spmmRowByRow_,OFF_F); idx_t CAT(spmmRowByRowCalculateSize_,OFF_F) (spmat* A, spmat*B, CONFIG* cfg, void** accumul, void **rows_sizes, void** tmp_matrix); -void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double** AS, idx_t** JA, idx_t** IRP); +void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double* AS, idx_t* JA, idx_t* IRP); /* * sparse parallel implementation of @A * @B parallelizing Gustavson diff --git a/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h b/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h index 85ad4e82..779c3cb6 100644 --- a/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h +++ b/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h @@ -37,12 +37,12 @@ * both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen */ inline void CAT(scSparseVectMul_,OFF_F) - (double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux){ - for (ulong i=0,j; i=aux->vLen){ - fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen); + fprintf(stderr,"index %d outside vLen %d\n",j,aux->vLen); assert(j < aux->vLen); } } @@ -65,12 +65,12 @@ inline void CAT(scSparseVectMul_,OFF_F) * both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen */ inline void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals, - ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux){ - for (ulong i=0,j; i=aux->vLen){ - fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen); + fprintf(stderr,"index %d outside vLen %d\n",j,aux->vLen); assert(j < aux->vLen); } } @@ -90,9 +90,9 @@ inline void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals, #endif ///TODO IMPLEMENT SCALAR<->ROW MUL AS GENERIC SPARSE VECTOR<->SCALAR MUL -//inline void scSparseRowMul(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){ -inline void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){ - ulong rowStartIdx = mat->IRP[trgtR]-OFF_F,rowLen; +//inline void scSparseRowMul(double scalar,spmat* mat,idx_t trgtR, ACC_DENSE* aux){ +inline void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,idx_t trgtR, ACC_DENSE* aux){ + idx_t rowStartIdx = mat->IRP[trgtR]-OFF_F,rowLen; #ifdef ROWLENS rowLen = mat->RL[trgtR]; #else @@ -116,7 +116,6 @@ inline idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B){ ERRPRINT("spMMSizeUpperbound: rowSizes calloc errd\n"); return NULL; } - printf("A->IRP %ld %ld %ld %ld %ld\n", A->IRP[0], A->IRP[1], A->IRP[2], A->IRP[3], A->IRP[4]); idx_t fullMatBound = 0; #pragma omp parallel for schedule(static) reduction(+:fullMatBound) for (idx_t r=0; rM; r++){ @@ -135,7 +134,7 @@ inline idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B){ rowSizes[A->M] = fullMatBound; AUDIT_INTERNAL_TIMES End= omp_get_wtime(); VERBOSE - printf("spMMSizeUpperbound:%lu\t%le s\n",rowSizes[A->M],End-Start); + printf("spMMSizeUpperbound:%d\t%le s\n",rowSizes[A->M],End-Start); return rowSizes; } @@ -170,7 +169,7 @@ inline idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F) rowPartsSizes[ A->M*gridCols ] = fullMatBound; AUDIT_INTERNAL_TIMES End= omp_get_wtime(); VERBOSE - printf("spMMSizeUpperboundColParts_:%lu\t%le s\n",rowPartsSizes[A->M],End-Start); + printf("spMMSizeUpperboundColParts_:%d\t%le s\n",rowPartsSizes[A->M],End-Start); return rowPartsSizes; } @@ -182,7 +181,7 @@ inline idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F) #include "utils.h" ///Allocs - Free //SpMM holder of accumulats -inline SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum){ +inline SPMM_ACC* initSpMMAcc(idx_t entriesNum, idx_t accumulatorsNum){ SPMM_ACC* out = calloc(1,sizeof(*out)); if (!out){ ERRPRINT("initSpMMAcc: out calloc errd\n"); @@ -275,7 +274,7 @@ static inline void _sparsifyUB(ACC_DENSE* accV,SPACC* accSparse,idx_t startColAc //row[Part] sparsified in a thread safe (exactly long) reserved area using atomics static inline void sparsifyUBNoPartsBounds - (SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc){ + (SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, idx_t startColAcc){ //sort nnz indexes of dense accumulator idx_t nnz = accV->nnzIdxMap.len; idx_t sparsifyStartV; //start index(inside @accSparse) of @accV to sparsify @@ -305,11 +304,11 @@ static inline void sparsifyUBNoPartsBounds */ inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat, CONFIG* conf){ - ulong nzNum=0,j,rLen,idx,partsNum = mat->M * conf->gridCols; + idx_t nzNum=0,j,rLen,idx,partsNum = mat->M * conf->gridCols; //TODO PARALLEL MERGE - SAVE STARTING offset OF EACH PARTITION IN THE OUT MATRIX - ulong* rowsPartsOffsets=alloca(partsNum*sizeof(*rowsPartsOffsets)); + idx_t* rowsPartsOffsets=alloca(partsNum*sizeof(*rowsPartsOffsets)); ///count nnz entries and alloc arrays for them - for (ulong r=0; rM; r++){ + for (idx_t r=0; rM; r++){ //for each partition ->get len -> outMat.IRP and aux offsets for (j=0,rLen=0; jgridCols; j++){ idx = IDX2D(r,j,conf->gridCols); @@ -332,20 +331,20 @@ inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat, return EXIT_FAILURE; } ///popolate with rows nnz values and indexes - ulong pLen; //omp for aux vars + idx_t pLen; //omp for aux vars #pragma omp parallel for schedule(static) private(pLen) - for (ulong i=0; iAS + rowsPartsOffsets[i],rowsParts[i].AS,pLen*sizeof(*(mat->AS))); memcpy(mat->JA + rowsPartsOffsets[i],rowsParts[i].JA,pLen*sizeof(*(mat->JA))); } CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually - for (ulong i=0,w=0; iM; i++){ + for (idx_t i=0,w=0; iM; i++){ if (mat->IRP[i] != w) {ERRPRINT("MERGE ROW ERR IRP\n");return -1;} for (j=0; jgridCols; j++){ SPACC r = rowsParts[IDX2D(i,j,conf->gridCols)]; - for (ulong jj=0; jjAS[w]!= r.AS[jj]){ ERRPRINT("MERGE ROW ERR AS\n"); return -1;} if (mat->JA[w]!= r.JA[jj]){ @@ -363,9 +362,9 @@ inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat, * allocd arrays to hold non zero values and indexes into @mat */ inline int mergeRows(SPACC* rows,spmat* mat){ - ulong nzNum=0; + idx_t nzNum=0; //count nnz entries and alloc arrays for them - for (ulong r=0; rM; ++r){ + for (idx_t r=0; rM; ++r){ nzNum += rows[r].len; mat->IRP[r+1] = nzNum; #ifdef ROWLENS @@ -384,15 +383,15 @@ inline int mergeRows(SPACC* rows,spmat* mat){ ///POPOLATE WITH ROWS NNZ VALUES AND INDEXES //TODO PARALLEL COPY #pragma omp parallel for schedule(static) - for (ulong r=0; rM; r++){ + for (idx_t r=0; rM; r++){ memcpy(mat->AS+mat->IRP[r], rows[r].AS, rows[r].len*sizeof(*(mat->AS))); memcpy(mat->JA+mat->IRP[r], rows[r].JA, rows[r].len*sizeof(*(mat->JA))); } CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually - for (ulong r=0,i=0; rM; r++){ + for (idx_t r=0,i=0; rM; r++){ if (i != mat->IRP[r]) {ERRPRINT("MERGE ROW ERR IRP\n");return -1;} - for (ulong j=0; jAS[i]!= rows[r].AS[j]){ ERRPRINT("MERGE ROW ERR AS\n"); return -1;} if (mat->JA[i] != rows[r].JA[j]){ @@ -415,9 +414,9 @@ inline int mergeRows(SPACC* rows,spmat* mat){ */ inline idx_t calculateSize(SPACC* rows, spmat *mat){ - ulong nzNum=0; + idx_t nzNum=0; //count nnz entries and alloc arrays for them - for (ulong r=0; rM; ++r){ + for (idx_t r=0; rM; ++r){ nzNum += rows[r].len; mat->IRP[r+1] = nzNum; #ifdef ROWLENS @@ -429,16 +428,17 @@ inline idx_t calculateSize(SPACC* rows, spmat *mat){ return nzNum; } -inline int mergeRowsPopulate(SPACC* rows, spmat* mat, double** AS, idx_t** JA, idx_t** IRP) { +inline int mergeRowsPopulate(SPACC* rows, spmat* mat, double* AS, idx_t* JA, idx_t* IRP) { // Populate with rows' non-zero values and indexes // TODO PARALLEL COPY - #pragma omp parallel for schedule(static) - for (ulong r = 0; r < mat->M; r++) { - memcpy(*AS + mat->IRP[r], rows[r].AS, rows[r].len * sizeof(double)); - memcpy(*JA + mat->IRP[r], rows[r].JA, rows[r].len * sizeof(idx_t)); + // #pragma omp parallel for schedule(static) + for (idx_t r = 0; r < mat->M; r++) { + fprintf(stderr, "r %d, AS %p, mat->IRP[r] %d, rows[r].AS %p, rows[r].len %d\n", r,AS,mat->IRP[r],rows[r].AS, rows[r].len); + memcpy(AS + mat->IRP[r], rows[r].AS, rows[r].len * sizeof(double)); + memcpy(JA + mat->IRP[r], rows[r].JA, rows[r].len * sizeof(idx_t)); } - memcpy(*IRP, mat->IRP, (mat->M + 1) * sizeof(idx_t)); + memcpy(IRP, mat->IRP, (mat->M + 1) * sizeof(idx_t)); // Consistency checks // TODO: Add your consistency checks here diff --git a/base/serial/impl/sp3mm4amg/include/config.h b/base/serial/impl/sp3mm4amg/include/config.h index c504dfcc..d8d516ac 100644 --- a/base/serial/impl/sp3mm4amg/include/config.h +++ b/base/serial/impl/sp3mm4amg/include/config.h @@ -40,11 +40,11 @@ typedef enum{ // typedef struct{ - ushort gridRows; - ushort gridCols; + int gridRows; + int gridCols; //TODO FULL CONFIG DOCCED HERE ROW_MMSYM_IMPL_MODE symbMMRowImplID; //how to compute symb mul (if required) - uint threadNum; //thread num to use in an OMP parallel region ... + int threadNum; //thread num to use in an OMP parallel region ... void* chunkDistrbFunc; //CHUNKS_DISTR_INTERF func pntr } CONFIG; ///Smart controls diff --git a/base/serial/impl/sp3mm4amg/include/inlineExports.h b/base/serial/impl/sp3mm4amg/include/inlineExports.h index cc3e0a83..820af188 100644 --- a/base/serial/impl/sp3mm4amg/include/inlineExports.h +++ b/base/serial/impl/sp3mm4amg/include/inlineExports.h @@ -33,17 +33,17 @@ void cleanRbNodes(rbRoot* root,rbNode* nodes,idx_t nodesNum); int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key); //void C_FortranShiftIdxs(spmat* outMat); //void Fortran_C_ShiftIdxs(spmat* m); -ACC_DENSE* _initAccVectors(ulong num,ulong size); -ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size); //TODO PERF WITH NEXT -SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum); +ACC_DENSE* _initAccVectors(idx_t num,idx_t size); +ACC_DENSE* _initAccVectors_monoalloc(idx_t num,idx_t size); //TODO PERF WITH NEXT +SPMM_ACC* initSpMMAcc(idx_t entriesNum, idx_t accumulatorsNum); idx_t reductionMaxSeq(idx_t* arr,idx_t arrLen); -int _allocAccDense(ACC_DENSE* v,ulong size); +int _allocAccDense(ACC_DENSE* v,idx_t size); int mergeRows(SPACC* rows,spmat* mat); int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,CONFIG* conf); -void _freeAccsDenseChecks(ACC_DENSE* vectors,ulong num); +void _freeAccsDenseChecks(ACC_DENSE* vectors,idx_t num); void _resetAccVect(ACC_DENSE* acc); void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc); void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen); -void freeAccsDense(ACC_DENSE* vectors,ulong num); +void freeAccsDense(ACC_DENSE* vectors,idx_t num); void freeSpMMAcc(SPMM_ACC* acc); -void sparsifyDenseVect(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc); +void sparsifyDenseVect(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, idx_t startColAcc); diff --git a/base/serial/impl/sp3mm4amg/include/inlineExports_Generic.h b/base/serial/impl/sp3mm4amg/include/inlineExports_Generic.h index eba852f2..08255ba0 100644 --- a/base/serial/impl/sp3mm4amg/include/inlineExports_Generic.h +++ b/base/serial/impl/sp3mm4amg/include/inlineExports_Generic.h @@ -35,9 +35,9 @@ ////inline exports //multi implmentation functions -void CAT(scSparseVectMul_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux); -void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux); -void CAT(_scRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux); -void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux); +void CAT(scSparseVectMul_,OFF_F)(double scalar,double* vectVals,idx_t* vectIdxs,idx_t vectLen, ACC_DENSE* aux); +void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,idx_t* vectIdxs,idx_t vectLen,idx_t startIdx,ACC_DENSE* aux); +void CAT(_scRowMul_,OFF_F)(double scalar,spmat* mat,idx_t trgtR, ACC_DENSE* aux); +void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,idx_t trgtR, ACC_DENSE* aux); idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B); idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)(spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets); diff --git a/base/serial/impl/sp3mm4amg/include/macros.h b/base/serial/impl/sp3mm4amg/include/macros.h index c75017ee..539430e8 100644 --- a/base/serial/impl/sp3mm4amg/include/macros.h +++ b/base/serial/impl/sp3mm4amg/include/macros.h @@ -42,7 +42,7 @@ #define IN_RANGE(i,start,end) ( (start) <= (i) && (i) <= (end) ) //ceil(x/y) with integers -#define INT_DIV_CEIL(x,y) ( ( (x) - 1) / (y) + 1 ) +#define INT_DIV_CEIL(x,y) ( ( (x) - 1) / (y) + 1 ) //2D ROW MAJOR indexing wrap compute #define IDX2D(i,j,nCols) ((j) + (i)*(nCols)) diff --git a/base/serial/impl/sp3mm4amg/include/ompChunksDivide.h b/base/serial/impl/sp3mm4amg/include/ompChunksDivide.h index 6d0ba45c..a97cefca 100644 --- a/base/serial/impl/sp3mm4amg/include/ompChunksDivide.h +++ b/base/serial/impl/sp3mm4amg/include/ompChunksDivide.h @@ -40,14 +40,14 @@ #include "config.h" #include "omp.h" //distribution of @rows|blocks of @matrix, exploiting @config -typedef void (CHUNKS_DISTR ) (ulong,spmat*,CONFIG*); -typedef void (*CHUNKS_DISTR_INTERF ) (ulong,spmat*,CONFIG*); +typedef void (CHUNKS_DISTR ) (idx_t,spmat*,CONFIG*); +typedef void (*CHUNKS_DISTR_INTERF ) (idx_t,spmat*,CONFIG*); //NOOP chunks division for manual configuration via export OMP_SCHEDULE -inline void chunksNOOP(ulong r,spmat* mat,CONFIG* cfg){ return; } +inline void chunksNOOP(idx_t r,spmat* mat,CONFIG* cfg){ return; } #include "ompGetICV.h" //fair division of @r elements from matrix @mat with threads num in @cfg -inline void chunksFair(ulong r,spmat* mat,CONFIG* cfg){ +inline void chunksFair(idx_t r,spmat* mat,CONFIG* cfg){ assert(cfg->threadNum > 0); //configured target thread num omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic; omp_get_schedule(&kind,&chunk_size); @@ -75,7 +75,7 @@ inline void chunksFair(ulong r,spmat* mat,CONFIG* cfg){ } //fair division of @r elements from matrix @mat of threads in @cfg //subdividing the fair share with a factor of @FAIR_CHUNKS_FOLDING -inline void chunksFairFolded(ulong r,spmat* mat,CONFIG* cfg){ +inline void chunksFairFolded(idx_t r,spmat* mat,CONFIG* cfg){ assert(cfg->threadNum > 0); //configured target thread num omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic; omp_get_schedule(&kind,&chunk_size); diff --git a/base/serial/impl/sp3mm4amg/include/parser.h b/base/serial/impl/sp3mm4amg/include/parser.h index c482f7e1..2d5f25ec 100644 --- a/base/serial/impl/sp3mm4amg/include/parser.h +++ b/base/serial/impl/sp3mm4amg/include/parser.h @@ -35,16 +35,16 @@ #include "sparseMatrix.h" typedef struct{ - ulong row; - ulong col; + idx_t row; + idx_t col; double val; } entry; //MatrixMarket COO entry typedef struct{ MM_typecode mcode; entry* entries; - ulong* rowLens; - ulong M,N,NZ; //spmat sizes + idx_t* rowLens; + idx_t M,N,NZ; //spmat sizes } MatrixMarket; ////COO PARSE @@ -61,7 +61,7 @@ int MMCheck(MM_typecode typecode); * return allocated and filled COO entries with the NNZ number into * NO SORT CHECKING HERE */ -entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens); +entry* MMtoCOO(idx_t* NZ, FILE *fp, MM_typecode mcode,idx_t* rowLens); ////COO -> ANYTHING ELSE CONVERSION /* @@ -69,13 +69,13 @@ entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens); * EXPECTED: CSR arrays allocated, @entries col sorted in (not madatory consecut) rows * [simmetrical parts explicitly rappresented --> not important here] */ -int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens); +int COOtoCSR(entry* entries, spmat* mat,idx_t* rowLens); /* * write COO entries in @entries inside sparse matrix @mat in ELL format * EXPECTED: @entries col sorted in (not madatory consecut) rows * ELL internal array allocated in this function, not freed in case of error */ -int COOtoELL(entry* entries, spmat* mat, ulong* rowLens); +int COOtoELL(entry* entries, spmat* mat, idx_t* rowLens); ////wrapper MM -> specialized target /* * Parse MatrixMarket matrix stored in file at @matPath diff --git a/base/serial/impl/sp3mm4amg/include/sparseMatrix.h b/base/serial/impl/sp3mm4amg/include/sparseMatrix.h index 8fc769e8..e146024a 100644 --- a/base/serial/impl/sp3mm4amg/include/sparseMatrix.h +++ b/base/serial/impl/sp3mm4amg/include/sparseMatrix.h @@ -75,7 +75,7 @@ typedef struct{ idx_t vLen; //size of the aux dense vector //TODO USELESS? SPVECT_IDX_DENSE_MAP nnzIdxMap; // } ACC_DENSE; -int allocAccDense(ACC_DENSE* v, ulong size); +int allocAccDense(ACC_DENSE* v, idx_t size); /* **** NNZ IDXS PRESENCE FLAGS ACCESS INTERFACE: *** diff --git a/base/serial/impl/sp3mm4amg/include/sparseUtilsGeneric.h b/base/serial/impl/sp3mm4amg/include/sparseUtilsGeneric.h index fbcab700..6d56c4d8 100644 --- a/base/serial/impl/sp3mm4amg/include/sparseUtilsGeneric.h +++ b/base/serial/impl/sp3mm4amg/include/sparseUtilsGeneric.h @@ -40,19 +40,19 @@ * returning an offsets matrix out[i][j] = start of jth colPartition of row i * subdivide @A columns in uniform cols ranges in the output */ -idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols); +idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,int gridCols); /* * partition CSR sparse matrix @A in @gridCols columns partitions as * indipended and allocated sparse matrixes and return them * subdivide @A columns in uniform cols ranges in the output */ -spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols); +spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,int gridCols); //same as above but with (aux) use of offsets partitoning (also returned if colOffsets!=NULL -spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols,idx_t** colPartsOffsets); +spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,int gridCols,idx_t** colPartsOffsets); //same as checkOverallocPercent but with 2D partitioning - CSR col partitioning -void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB, +void CAT(checkOverallocRowPartsPercent_,OFF_F)(idx_t* forecastedSizes,spmat* AB, idx_t gridCols,idx_t* bColOffsets); /////////////////////////////////////////////////////////////////////////////// @@ -62,13 +62,13 @@ void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB, //shift every index about the sparse data for use the matrix in a fortran app inline void C_FortranShiftIdxs(spmat* m){ - for(ulong r=0; rM+1; m -> IRP[r]++,r++); - for(ulong i=0; iNZ; m -> JA[i]++, i++); + for(idx_t r=0; rM+1; m -> IRP[r]++,r++); + for(idx_t i=0; iNZ; m -> JA[i]++, i++); } //shift every index about the sparse data for use the matric in a C app inline void Fortran_C_ShiftIdxs(spmat* m){ //TODO DBG ONLY and compleatness - for(ulong r=0; rM+1; m -> IRP[r]--,r++); - for(ulong i=0; iNZ; m -> JA[i]--, i++); + for(idx_t r=0; rM+1; m -> IRP[r]--,r++); + for(idx_t i=0; iNZ; m -> JA[i]--, i++); } /* @@ -77,7 +77,7 @@ inline void Fortran_C_ShiftIdxs(spmat* m){ //TODO DBG ONLY and compleatness * in @forecastedSizes there's for each row -> forecasted size * and in the last entry the cumulative of the whole matrix */ -void checkOverallocPercent(ulong* forecastedSizes,spmat* AB); +void checkOverallocPercent(idx_t* forecastedSizes,spmat* AB); /* check if sparse matrixes A<->B differ up to DOUBLE_DIFF_THREASH per element @@ -86,7 +86,7 @@ int spmatDiff(spmat* A, spmat* B); ////dyn alloc of spMM output matrix /* ///size prediction of AB = @A * @B -inline ulong SpMMPreAlloc(spmat* A,spmat* B){ +inline idx_t SpMMPreAlloc(spmat* A,spmat* B){ //TODO BETTER PREALLOC HEURISTICS HERE return MAX(A->NZ,B->NZ); } @@ -110,7 +110,7 @@ inline spmat* initSpMatrixSpMM(spmat* A, spmat* B){ #define REALLOC_FACTOR 1.5 //realloc sparse matrix NZ arrays -inline int reallocSpMatrix(spmat* mat,ulong newSize){ +inline int reallocSpMatrix(spmat* mat,idx_t newSize){ mat->NZ *= newSize; void* tmp; if (!(tmp = realloc(mat->AS,mat->NZ * sizeof(*(mat->AS))))){ @@ -133,6 +133,6 @@ void printSparseMatrix(spmat* sparseMat,char justNZMarkers); /*convert @sparseMat sparse matrix in dense matrix returned*/ double* CSRToDense(spmat* sparseMat); -void freeAccsDense(ACC_DENSE* vectors,ulong num); +void freeAccsDense(ACC_DENSE* vectors,idx_t num); #endif //SPARSEUTILS_H_COMMON_IDX_IMPLS diff --git a/base/serial/impl/sp3mm4amg/include/utils.h b/base/serial/impl/sp3mm4amg/include/utils.h index 8ef47492..eb62dec6 100644 --- a/base/serial/impl/sp3mm4amg/include/utils.h +++ b/base/serial/impl/sp3mm4amg/include/utils.h @@ -58,18 +58,18 @@ int createNewFile(char* const outFpath); #define DOUBLE_STR_FORMAT "%25le\n" //write double vector @v as row sequence of double at @fpath //e.g. read with od -tf8 -w8 fpath : OCTALOFFSET: DOUBLE FULL DIGITS -int writeDoubleVector(char* fpath,double* v,ulong size); +int writeDoubleVector(char* fpath,double* v,idx_t size); /* * read vector of double [Str] of arbitrary size from @fpath, true lenght in *size * if size point to a nnz value, the initial allocation will be of *size * eventual successive reallocation done multipling *size with VECTOR_STEP_REALLOC */ -double* readDoubleVector(char* fpath,ulong* size); -double* readDoubleVectorStr(char* fpath,ulong* size); +double* readDoubleVector(char* fpath,idx_t* size); +double* readDoubleVectorStr(char* fpath,idx_t* size); ///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF //dual of readDoubleVectorVector -int writeDoubleVectorAsStr(char* fpath,double* v,ulong size); +int writeDoubleVectorAsStr(char* fpath,double* v,idx_t size); int MPI_Dims_create(int nnodes, int ndims, int dims[]); //commons/ompi_dims_create/ompi_dims_create.c #include "config.h" @@ -81,16 +81,16 @@ int getConfig(CONFIG* conf); //append only list implemented with a reallocated array typedef struct{ - ulong* a; - ulong size; - ulong lastIdx; + idx_t* a; + idx_t size; + idx_t lastIdx; } APPENDARRAY; //append @val to @list, reallocating if reached end -//TODO inline int appendArr(ulong val,APPENDARRAY* list); +//TODO inline int appendArr(idx_t val,APPENDARRAY* list); void sortuint(uint* arr, uint len); //sort uint array @arr of @len elements void sort_idx_t(idx_t* arr, idx_t len); -void sortulong(ulong* arr, ulong len); +void sortidx_t(idx_t* arr, idx_t len); void sortRbNode(rbNode* arr,idx_t len); ///ranges functions @@ -157,9 +157,9 @@ inline idx_t reductionMaxOmp(idx_t* arr,idx_t arrLen){ * of the 2 entries of the input vectors, signed as a[i] - b[i] (as well as dump prints) * CONVENTION: @a = true result, @b vector to check with the true result */ -int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax); +int doubleVectorsDiff(double* a, double* b, idx_t n,double* diffMax); //fill a random vector in @v long @size doubles -int fillRndVector(ulong size, double* v); +int fillRndVector(idx_t size, double* v); //read vector as a sequence of space separated double from file at @fpath #define VECTOR_STEP_MALLOC 100 @@ -174,8 +174,8 @@ int fillRndVector(ulong size, double* v); int extractInTmpFS(char* path, char* tmpFsDecompressPath); //compute E[@values] in @out[0] and VAR[@values] in @out[1] of @numVals values void statsAvgVar(double* values,uint numVals, double* out); -void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers); -void printVector(double* v,ulong size); +void printMatrix(double* mat,idx_t m,idx_t n,char justNZMarkers); +void printVector(double* v,idx_t size); void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen); #endif diff --git a/base/serial/impl/sp3mm4amg/lib/parser.c b/base/serial/impl/sp3mm4amg/lib/parser.c index f527500f..ed59ac75 100644 --- a/base/serial/impl/sp3mm4amg/lib/parser.c +++ b/base/serial/impl/sp3mm4amg/lib/parser.c @@ -52,10 +52,10 @@ int MMCheck(MM_typecode mcode) { return EXIT_SUCCESS; } -entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens){ +entry* MMtoCOO(idx_t* NZ, FILE *fp, MM_typecode mcode,idx_t* rowLens){ int scanndRet=0; - ulong nzTrgt=*NZ,nzIdx=0; //expanded num of nz (in case of sym matrix) - ulong diagEntries=0, row = 0, col = 0;//current entry's row,col from MM -> 1 based + idx_t nzTrgt=*NZ,nzIdx=0; //expanded num of nz (in case of sym matrix) + idx_t diagEntries=0, row = 0, col = 0;//current entry's row,col from MM -> 1 based double val = 0; entry* entries = NULL; //COO parsed entries ///init @@ -70,10 +70,10 @@ entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens){ ///parse MM fp lines into COOordinate entries while (1) { // Reading the fp until EOF if (mm_is_pattern(mcode)){ - scanndRet = fscanf(fp, "%lu %lu\n", &row, &col); + scanndRet = fscanf(fp, "%d %d\n", &row, &col); val = 1.0; } else if (mm_is_real(mcode) || (mm_is_integer(mcode))){ - scanndRet = fscanf(fp, "%lu %lu %lf\n", &row, &col, &val); + scanndRet = fscanf(fp, "%d %d %lf\n", &row, &col, &val); } @@ -168,11 +168,11 @@ MatrixMarket* MMRead(char* matPath){ ////COO -> ANYTHING ELSE CONVERSION -int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){ +int COOtoCSR(entry* entries, spmat* mat,idx_t* rowLens){ int out = EXIT_FAILURE; - ulong idx; + idx_t idx; long* _rowsLastCol = NULL; //for each row -> last added entry's columnIdx - ulong* rowsNextIdx = NULL; //for each row -> next entry progressive idx + idx_t* rowsNextIdx = NULL; //for each row -> next entry progressive idx if (!(rowsNextIdx = calloc(mat->M,sizeof(*rowsNextIdx)))){ ERRPRINT("MMtoCOO: rowsNextIdx calloc errd\n"); goto _end; @@ -186,9 +186,9 @@ int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){ } /*TODO OLD * //get rowLens->IRP (partial), TODO moved MMtoCOO to avoid FULL rescan entries - * for (ulong i=0; iNZ; i++) mat->IRP[entries[i].row+1]++; + * for (idx_t i=0; iNZ; i++) mat->IRP[entries[i].row+1]++; * memcpy(mat->RL,mat->IRP + 1,sizeof(*mat->IRP) * mat->M); //TODO in next ifdef - * for (ulong i=2; iM+1; i++) mat->IRP[i] += mat->IRP[i-1]; + * for (idx_t i=2; iM+1; i++) mat->IRP[i] += mat->IRP[i-1]; * OLD2: rowLens memcpy ... no just moved the pointer * #ifdef ROWLENS * memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef @@ -197,19 +197,19 @@ int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){ //IRP: trasform rows lens as increments to build row index "pointer" //0th -> 0 mandatory; 1th = 0th row len, ...., M+1th = end of Mth row memcpy(mat->IRP+1,rowLens,sizeof(*rowLens) * mat->M);//init IRP with rows lens - for (ulong i=2; iM+1; i++) mat->IRP[i] += mat->IRP[i-1]; + for (idx_t i=2; iM+1; i++) mat->IRP[i] += mat->IRP[i-1]; CONSISTENCY_CHECKS assert(mat->IRP[mat->M] == mat->NZ); ///FILL //TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS //sorted for each row (even nn sequential in @entries) //entries write in CSR format entry* e; - for (ulong i=0; iNZ; i++) { + for (idx_t i=0; iNZ; i++) { e = entries+i; CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE SORTED /*#pragma message("COO sorting check enabled")*/ if (_rowsLastCol[e->row] >= (long) e->col){ - ERRPRINTS("not sorted entry:%ld,%ld,%lf",e->row,e->col,e->val); + ERRPRINTS("not sorted entry:%d,%d,%lf",e->row,e->col,e->val); goto _end; } _rowsLastCol[e->row] = e->col; @@ -228,16 +228,16 @@ int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){ return out; } -int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){ +int COOtoELL(entry* entries, spmat* mat, idx_t* rowLens){ int out=EXIT_FAILURE; - ulong maxRow = 0, col, _ellEntriesTot, *rowsNextCol; + idx_t maxRow = 0, col, _ellEntriesTot, *rowsNextCol; long* _rowsLastCol=NULL; entry* e; - for (ulong i=0; iM; i++) maxRow = MAX(maxRow,rowLens[i]); + for (idx_t i=0; iM; i++) maxRow = MAX(maxRow,rowLens[i]); _ellEntriesTot = 2*mat->M*maxRow; #ifdef LIMIT_ELL_SIZE if ( _ellEntriesTot > ELL_MAX_ENTRIES ){ - ERRPRINTS("Required entries %lu -> %lu uMB for the matrix exceed the " + ERRPRINTS("Required entries %d -> %lu uMB for the matrix exceed the " "designated threashold of: %lu -> %lu MB for ellpack\n", _ellEntriesTot,(sizeof(double)*_ellEntriesTot) >> 20, ELL_MAX_ENTRIES,(sizeof(double)*ELL_MAX_ENTRIES) >> 20); @@ -274,12 +274,12 @@ int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){ ///FILL NZ //TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS //sorted for each row (even nn sequential in @entries) - for (ulong i=0; iNZ; i++){ + for (idx_t i=0; iNZ; i++){ e = entries + i; CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE COLS SORTED for righe successive /*#pragma message("COO sorting check enabled")*/ if (_rowsLastCol[e->row] >= (long) e->col){ - ERRPRINTS("not sorted entry:%ld,%ld,%lf", + ERRPRINTS("not sorted entry:%d,%d,%lf", e->row,e->col,e->val); goto _end; } @@ -291,16 +291,16 @@ int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){ } ///FILL PAD - ulong padded = 0,paddedEntries = mat->M*mat->MAX_ROW_NZ; - for (ulong r=0; rM; r++){ - for (ulong c=rowLens[r],j=IDX2D(r,c,maxRow); cM*mat->MAX_ROW_NZ; + for (idx_t r=0; rM; r++){ + for (idx_t c=rowLens[r],j=IDX2D(r,c,maxRow); cAS[j] = ELL_AS_FILLER; //TODO ALREADY DONE IN CALLOC //mat->JA[j] = mat->JA[rowLens[r]-1]; //ELL_JA_FILLER; //TODO calloc CUDA benefit? } } VERBOSE{ - printf("padded %lu entries = %lf%% of NZ\n",padded,100*padded/(double) mat->NZ); - printf("ELL matrix of: %lu paddedEntries -> %lu MB of JA+AS\n", + printf("padded %d entries = %lf%% of NZ\n",padded,100*padded/(double) mat->NZ); + printf("ELL matrix of: %d paddedEntries -> %ld MB of JA+AS\n", paddedEntries,(paddedEntries*sizeof(*(mat->JA))+paddedEntries*sizeof(*(mat->AS))) >> 20); } out = EXIT_SUCCESS; @@ -318,7 +318,7 @@ spmat* MMtoCSR(char* matPath){ return NULL; } if (!(mat = calloc(1,sizeof(*mat)))){ - ERRPRINT("MMtoCSR: mat struct alloc errd"); + ERRPRINT("MMtoCSR: mat struct alloc err\n"); goto err; } mat -> M = mm->M; @@ -345,7 +345,7 @@ spmat* MMtoCSR(char* matPath){ #endif VERBOSE - printf("MMtoCSR: %lu NZ entries-> %lu MB of AS+JA+IRP\n",mat->NZ, + printf("MMtoCSR: %d NZ entries-> %ld MB of AS+JA+IRP\n",mat->NZ, (mat->NZ*sizeof(*(mat->AS))+mat->NZ*sizeof(*(mat->JA))+(1+mat->M*sizeof(*(mat->IRP))))>>20); goto _free; diff --git a/base/serial/impl/sp3mm_impl.f90 b/base/serial/impl/sp3mm_impl.f90 index 972b4530..995a44c9 100644 --- a/base/serial/impl/sp3mm_impl.f90 +++ b/base/serial/impl/sp3mm_impl.f90 @@ -15,24 +15,20 @@ subroutine dspmm(a,b,c,info, impl_choice) integer(psb_ipk_), intent(in) :: impl_choice ! Internal variables - integer(c_size_t) :: a_m,a_n,a_nz - type(c_ptr) :: a_as,a_ja,a_irp - integer(c_size_t) :: a_max_row_nz - integer(c_size_t) :: b_m,b_n,b_nz - type(c_ptr) :: b_as,b_ja,b_irp - integer(c_size_t) :: b_max_row_nz - integer(c_int) :: impl_choice_ - type(c_ptr) :: accumul, rows_sizes, tmp_matrix - integer(c_size_t) :: nnz - type(c_ptr) :: c_as,c_ja,c_irp + integer(c_int) :: a_m,a_n,a_nz + type(c_ptr) :: a_as,a_ja,a_irp + integer(c_int) :: b_m,b_n,b_nz + type(c_ptr) :: b_as,b_ja,b_irp + integer(c_int) :: impl_choice_ + type(c_ptr) :: accumul, rows_sizes, tmp_matrix + integer(c_int) :: nnz + type(c_ptr) :: c_as,c_ja,c_irp interface spmm_build_spacc subroutine psb_f_spmm_build_spacc(c_a_m,c_a_n,c_a_nz,& c_a_as,c_a_ja,c_a_irp,& - c_a_max_row_nz,& c_b_m,c_b_n,c_b_nz,& c_b_as,c_b_ja,c_b_irp,& - c_b_max_row_nz,& c_impl_choice,& c_accumul,& c_rows_sizes,& @@ -41,16 +37,14 @@ subroutine dspmm(a,b,c,info, impl_choice) c_nnz) bind(C) use iso_c_binding use psb_base_mod - integer(c_size_t), intent(in), value :: c_a_m,c_a_n,c_a_nz + integer(c_int), intent(in), value :: c_a_m,c_a_n,c_a_nz type(c_ptr), intent(in) :: c_a_as,c_a_ja,c_a_irp - integer(c_size_t), intent(in), value :: c_a_max_row_nz - integer(c_size_t), intent(in), value :: c_b_m,c_b_n,c_b_nz + integer(c_int), intent(in), value :: c_b_m,c_b_n,c_b_nz type(c_ptr), intent(in) :: c_b_as,c_b_ja,c_b_irp - integer(c_size_t), intent(in), value :: c_b_max_row_nz integer(c_int), intent(in), value :: c_impl_choice type(c_ptr), intent(out) :: c_accumul,c_rows_sizes,c_tmp_matrix integer(psb_ipk_), intent(out) :: c_info - integer(c_size_t), intent(out) :: c_nnz + integer(c_int), intent(out) :: c_nnz end subroutine psb_f_spmm_build_spacc end interface spmm_build_spacc @@ -74,7 +68,6 @@ subroutine dspmm(a,b,c,info, impl_choice) a_m = a%get_nrows() a_n = a%get_ncols() a_nz = a%get_nzeros() - write(*,*) 'IRP(1:5) ',a%irp(1:5) a_as = c_loc(a%val) a_ja = c_loc(a%ja) a_irp = c_loc(a%irp) @@ -89,24 +82,25 @@ subroutine dspmm(a,b,c,info, impl_choice) ! call calculateSize call psb_f_spmm_build_spacc(a_m,a_n,a_nz,a_as,& - a_ja,a_irp,a_max_row_nz,& + a_ja,a_irp,& b_m,b_n,b_nz,b_as,b_ja,& - b_irp,b_max_row_nz,& + b_irp,& impl_choice_,accumul,& rows_sizes,tmp_matrix,& info,nnz) ! allocate c%val, c%ja and c%irp - allocate(c%val(nnz)) - allocate(c%ja(nnz)) - allocate(c%irp(a_m + 1)) - - ! c_as = c_loc(c%val) - ! c_ja = c_loc(c%ja) - ! c_irp = c_loc(c%irp) + call psb_realloc(nnz,c%val,info) + if (info /= 0) write(*,*) "reallocation failed" + call psb_realloc(nnz,c%ja,info) + call psb_realloc(a_m+1,c%irp,info) - ! c%set_nrows(a_m) - ! c%set_ncols(b_n) + c_as = c_loc(c%val) + c_ja = c_loc(c%ja) + c_irp = c_loc(c%irp) + + call c%set_nrows(a_m) + call c%set_ncols(b_n) ! call spmmRowByRowPopulate call psb_f_spmm_merge_spacc(accumul,&