changed all unsigned longs to int, modified makefile for debug purposes, reallocate c matrix arrays in sp3mm_impl.f90

sp3mm-interface
wlthr 2 years ago
parent 5e252b77bb
commit 4929991249

@ -10,7 +10,7 @@ CWALL+=-Wno-unused-label -Wfatal-errors
CINCL = -Iinclude/ CINCL = -Iinclude/
CFLAGS = -g -O3 $(CWALL) $(CINCL) -fopenmp $(RUNTIME) CFLAGS = -g -O3 $(CWALL) $(CINCL) -fopenmp $(RUNTIME)
MACROS = -DDEBUGPRINT="if(FALSE)" -DDEBUG="if(FALSE)" -DCONSISTENCY_CHECKS="if(FALSE)" -DVERBOSE="if(FALSE)" -DDEBUGCHECKS="if(FALSE)" MACROS = -DDEBUGPRINT="if(FALSE)" -DDEBUG="if(FALSE)" -DCONSISTENCY_CHECKS="if(FALSE)" -DVERBOSE="if(FALSE)" -DDEBUGCHECKS="if(FALSE)"
MACROSDBG = -DCONSISTENCY_CHECKS="if(TRUE)" -DDEBUGCHECKS="if(TRUE)" -DVERBOSE="if(TRUE)" -DDEBUG="if(TRUE)" MACROSDBG = -DCONSISTENCY_CHECKS="if(TRUE)" -DDEBUGCHECKS="if(TRUE)" -DVERBOSE="if(TRUE)" -DDEBUG="if(TRUE)" -DDEBUGPRINT="if(TRUE)"
LDFLAGS = -lm LDFLAGS = -lm
LIBDIR=../../../ LIBDIR=../../../

@ -122,7 +122,7 @@ spmat* CAT(spmmRowByRow_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
bRowLen = B->IRP[ca+1] - B->IRP[ca]; bRowLen = B->IRP[ca+1] - B->IRP[ca];
CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc); CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc);
} */ } */
for (ulong ja=A->IRP[r]-OFF_F; ja<A->IRP[r+1]-OFF_F; ja++) //row-by-row formul for (idx_t ja=A->IRP[r]-OFF_F; ja<A->IRP[r+1]-OFF_F; ja++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc); CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc);
//direct sparsify: trasform accumulated dense vector to a CSR row //direct sparsify: trasform accumulated dense vector to a CSR row
sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
@ -165,10 +165,10 @@ spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
///NUMERIC STEP ///NUMERIC STEP
//perform Gustavson over rows blocks -> M / @cfg->gridRows //perform Gustavson over rows blocks -> M / @cfg->gridRows
ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows; idx_t rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows;
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg); ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
ulong b,startRow,block; //omp for aux vars idx_t b,startRow,block; //omp for aux vars
#pragma omp parallel for schedule(runtime) private(acc,startRow,block) #pragma omp parallel for schedule(runtime) private(acc,startRow,block)
for (b=0; b < cfg->gridRows; b++){ for (b=0; b < cfg->gridRows; b++){
block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem); block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem);
@ -184,7 +184,7 @@ spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
bRowLen = B->IRP[ca+1] - B->IRP[ca]; bRowLen = B->IRP[ca+1] - B->IRP[ca];
CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc); CAT(scSparseVectMul_,OFF_F)(A->AS[ja], B->AS+jb,B->JA+jb,bRowLen,acc);
} */ } */
for (ulong ja=A->IRP[r]-OFF_F; ja<A->IRP[r+1]-OFF_F; ja++) //row-by-row formul for (idx_t ja=A->IRP[r]-OFF_F; ja<A->IRP[r+1]-OFF_F; ja++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc); CAT(scSparseRowMul_,OFF_F)(A->AS[ja], B, A->JA[ja]-OFF_F, acc);
//direct sparsify: trasform accumulated dense vector to a CSR row //direct sparsify: trasform accumulated dense vector to a CSR row
sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API sparsifyDirect(acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
@ -209,7 +209,7 @@ spmat* CAT(spmmRowByRow1DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
} }
spmat* CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ spmat* CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N); DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%dxN=%d\n",A->M,B->N);
idx_t* bColOffsets = NULL; //B group columns starting offset for each row idx_t* bColOffsets = NULL; //B group columns starting offset for each row
ACC_DENSE *accVectors=NULL,*accV; ACC_DENSE *accVectors=NULL,*accV;
SPACC* accRowPart; SPACC* accRowPart;
@ -218,10 +218,10 @@ spmat* CAT(spmmRowByRow2DBlocks_SymbNum_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
idx_t *rowsPartsSizes=NULL, *rowSizes=NULL; //for rows' cols partition, correct len idx_t *rowsPartsSizes=NULL, *rowSizes=NULL; //for rows' cols partition, correct len
if (!AB) goto _err; if (!AB) goto _err;
//2D indexing aux vars //2D indexing aux vars
ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; idx_t _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; idx_t _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables idx_t startRow,startCol,rowBlock,colBlock; //data division aux variables
////get bColOffsets for B column groups ////get bColOffsets for B column groups
if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols)))
goto _err; goto _err;

@ -114,7 +114,7 @@ static inline idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F)
* sorting inplace the nodes inserted in the rbtree */ * sorting inplace the nodes inserted in the rbtree */
sortRbNode(nodes,abRowLen); sortRbNode(nodes,abRowLen);
#elif _OUT_IDXS == T || _COL_PARTS == T #elif _OUT_IDXS == T || _COL_PARTS == T
uint i=0; int i=0;
idx_t k; idx_t k;
#if _COL_PARTS == T #if _COL_PARTS == T
//colParts aux vars //colParts aux vars
@ -144,7 +144,7 @@ static inline idx_t CAT4(SpMM_Row_Symb_Rbtree,OUT_IDXS,COL_PARTS,OFF_F)
idx_t k; idx_t k;
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){ for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
k = rb_entry(n,rbNode,rb)->key; k = rb_entry(n,rbNode,rb)->key;
printf("%lu, ",k); printf("%d, ",k);
} }
printf("\n"); printf("\n");
}*/ }*/
@ -327,7 +327,8 @@ idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
goto _err; goto _err;
} }
#endif //_COL_PARTS #endif //_COL_PARTS
uint maxThreads = omp_get_max_threads(); //TODO FROM CFG int maxThreads;
maxThreads = omp_get_max_threads(); //TODO FROM CFG
//index keeping aux struct //index keeping aux struct
//rbtree implementation or idxMap with aux of symbTree for tmp outIdx keeping //rbtree implementation or idxMap with aux of symbTree for tmp outIdx keeping
if ( rbTreeUsed ){ if ( rbTreeUsed ){
@ -340,7 +341,7 @@ idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
goto _err; goto _err;
} }
//init roots //init roots
for (uint i=0; i<maxThreads; i++) for (int i=0; i<maxThreads; i++)
rbRoots[i] = RB_ROOT_CACHED; rbRoots[i] = RB_ROOT_CACHED;
} }
if (symbRowImplID == IDXMAP){ //idxMap implementation && not outIdxs via rbtree if (symbRowImplID == IDXMAP){ //idxMap implementation && not outIdxs via rbtree
@ -350,7 +351,7 @@ idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
goto _err; goto _err;
} }
//init idxs maps //init idxs maps
for (uint i=0; i<maxThreads; i++){ for (int i=0; i<maxThreads; i++){
if(initSpVectIdxDenseAcc(b->N,idxsMapAccs+i)) goto _err; if(initSpVectIdxDenseAcc(b->N,idxsMapAccs+i)) goto _err;
} }
} }
@ -409,7 +410,7 @@ idx_t* CAT4(SpMM_Symb_,OUT_IDXS,COL_PARTS,OFF_F)
free(rbRoots); free(rbRoots);
free(rbNodes); free(rbNodes);
if (idxsMapAccs){ if (idxsMapAccs){
for (uint i=0; i<maxThreads; i++) free(idxsMapAccs[i].idxsMap); for (int i=0; i<maxThreads; i++) free(idxsMapAccs[i].idxsMap);
free(idxsMapAccs); free(idxsMapAccs);
} }
@ -439,7 +440,7 @@ idx_t CAT3(Sp3MM_Row_Symb_,OUT_IDXS,OFF_F)
#if _OUT_IDXS == TRUE #if _OUT_IDXS == TRUE
#ifndef OUT_IDXS_RBTREE_NODES #ifndef OUT_IDXS_RBTREE_NODES
//return the mul.result nnz index inside the rbNodes //return the mul.result nnz index inside the rbNodes
uint i=0; int i=0;
//rbNodeOrderedVisit(n,root) outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key; //rbNodeOrderedVisit(n,root) outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key;
for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){ for (struct rb_node* n = rb_first(&root->rb_root); n; n = rb_next(n)){
outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key; outIdxs[ i++ ] = rb_entry(n,rbNode,rb)->key;
@ -495,7 +496,7 @@ idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F)
*outIdxs[i] = upperBoundedSymMat + cumul; *outIdxs[i] = upperBoundedSymMat + cumul;
#endif //#if _OUT_IDXS == TRUE #endif //#if _OUT_IDXS == TRUE
//rbTrees for index keeping //rbTrees for index keeping
uint maxThreads = omp_get_max_threads(); //TODO FROM CFG int maxThreads = omp_get_max_threads(); //TODO FROM CFG
idx_t abMaxRowLen = reductionMaxSeq(abUpperBoundedRowsLens, a->M); idx_t abMaxRowLen = reductionMaxSeq(abUpperBoundedRowsLens, a->M);
#ifdef HEURISTICS_UB #ifdef HEURISTICS_UB
idx_t maxRowLenUB = abMaxRowLen * SP3MM_UB_HEURISTIC; //TODO UB HEURISTC idx_t maxRowLenUB = abMaxRowLen * SP3MM_UB_HEURISTIC; //TODO UB HEURISTC
@ -515,7 +516,7 @@ idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F)
goto _err; goto _err;
} }
//init roots //init roots
for (uint i=0; i<maxThreads; i++) rbRoots[i] = RB_ROOT_CACHED; for (int i=0; i<maxThreads; i++) rbRoots[i] = RB_ROOT_CACHED;
if (!( rbNodes = malloc(maxThreads * maxRowLenUB * sizeof(*rbNodes)) )){ if (!( rbNodes = malloc(maxThreads * maxRowLenUB * sizeof(*rbNodes)) )){
ERRPRINT("Sp3MM_Symb_ rbNodes malloc errdn\n"); ERRPRINT("Sp3MM_Symb_ rbNodes malloc errdn\n");
goto _err; goto _err;
@ -527,7 +528,7 @@ idx_t* CAT3(Sp3MM_Symb_,OUT_IDXS,OFF_F)
goto _err; goto _err;
} }
//init idxs maps //init idxs maps
for (uint i=0; i<maxThreads; i++){ for (int i=0; i<maxThreads; i++){
if(initSpVectIdxDenseAcc(c->N,idxsMapAccs+i)) goto _err; if(initSpVectIdxDenseAcc(c->N,idxsMapAccs+i)) goto _err;
} }
} }

@ -42,7 +42,7 @@ spmat* CAT(spmmSerial_,OFF_F)(spmat* A,spmat* B, CONFIG* _cfg){ //serial impleme
if ( allocAccDense(&acc,B->N) ) goto _free; if ( allocAccDense(&acc,B->N) ) goto _free;
if (!(AB = allocSpMatrix(A->M,B->N))) goto _free; if (!(AB = allocSpMatrix(A->M,B->N))) goto _free;
for( idx_t r=0; r<A->M; r++ ){ for( idx_t r=0; r<A->M; r++ ){
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul for (idx_t c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, &acc); CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, &acc);
sparsifyDirect(&acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API sparsifyDirect(&acc,AB,r); //0,NULL);TODO COL PARTITIONING COMMON API
} }
@ -54,7 +54,7 @@ spmat* CAT(spmmSerial_,OFF_F)(spmat* A,spmat* B, CONFIG* _cfg){ //serial impleme
////////Sp3MM as 2 x SpMM ////////Sp3MM as 2 x SpMM
///1D ///1D
spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N); DEBUG printf("spmm\trows of A,\tfull B\tM=%d x N=%d\n",A->M,B->N);
///thread aux ///thread aux
ACC_DENSE *accVects = NULL,*acc; ACC_DENSE *accVects = NULL,*acc;
SPMM_ACC* outAccumul=NULL; SPMM_ACC* outAccumul=NULL;
@ -83,7 +83,7 @@ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg); ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
#pragma omp parallel for schedule(runtime) private(acc) #pragma omp parallel for schedule(runtime) private(acc)
for (ulong r=0; r<A->M; r++){ //row-by-row formulation for (idx_t r=0; r<A->M; r++){ //row-by-row formulation
//iterate over nz entry index c inside current row r //iterate over nz entry index c inside current row r
acc = accVects + omp_get_thread_num(); acc = accVects + omp_get_thread_num();
/* direct use of sparse scalar vector multiplication /* direct use of sparse scalar vector multiplication
@ -93,7 +93,7 @@ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
bRowLen = B->IRP[ca+1] - B->IRP[ca]; bRowLen = B->IRP[ca+1] - B->IRP[ca];
CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc); CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc);
}*/ }*/
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul for (idx_t c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc); CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
//trasform accumulated dense vector to a CSR row //trasform accumulated dense vector to a CSR row
#if SPARSIFY_PRE_PARTITIONING == T #if SPARSIFY_PRE_PARTITIONING == T
@ -124,7 +124,7 @@ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
} }
idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void** accumul, void **rows_sizes, void** tmp_matrix){ idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void** accumul, void **rows_sizes, void** tmp_matrix){
DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N); DEBUG printf("spmm\trows of A,\tfull B\tM=%d x N=%d\n",A->M,B->N);
///thread aux ///thread aux
ACC_DENSE *accVects = NULL,*acc; ACC_DENSE *accVects = NULL,*acc;
SPMM_ACC* outAccumul=NULL; SPMM_ACC* outAccumul=NULL;
@ -153,7 +153,7 @@ idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg); ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
#pragma omp parallel for schedule(runtime) private(acc) #pragma omp parallel for schedule(runtime) private(acc)
for (ulong r=0; r<A->M; r++){ //row-by-row formulation for (idx_t r=0; r<A->M; r++){ //row-by-row formulation
//iterate over nz entry index c inside current row r //iterate over nz entry index c inside current row r
acc = accVects + omp_get_thread_num(); acc = accVects + omp_get_thread_num();
/* direct use of sparse scalar vector multiplication /* direct use of sparse scalar vector multiplication
@ -163,7 +163,7 @@ idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void
bRowLen = B->IRP[ca+1] - B->IRP[ca]; bRowLen = B->IRP[ca+1] - B->IRP[ca];
CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc); CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc);
}*/ }*/
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul for (idx_t c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc); CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
//trasform accumulated dense vector to a CSR row //trasform accumulated dense vector to a CSR row
#if SPARSIFY_PRE_PARTITIONING == T #if SPARSIFY_PRE_PARTITIONING == T
@ -197,7 +197,7 @@ idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void
if(outAccumul) freeSpMMAcc(outAccumul); if(outAccumul) freeSpMMAcc(outAccumul);
} }
void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double** AS, idx_t** JA, idx_t** IRP){ void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double* AS, idx_t* JA, idx_t* IRP){
SPMM_ACC* outAccumul= *accumul; SPMM_ACC* outAccumul= *accumul;
idx_t* rowsSizes = *rows_sizes; idx_t* rowsSizes = *rows_sizes;
spmat *AB = *tmp_matrix; spmat *AB = *tmp_matrix;
@ -216,7 +216,7 @@ void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void**
} }
spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N); DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%d x N=%d\n",A->M,B->N);
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
///thread aux ///thread aux
ACC_DENSE *accVects = NULL,*acc; ACC_DENSE *accVects = NULL,*acc;
@ -243,10 +243,10 @@ spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
#endif #endif
//perform Gustavson over rows blocks -> M / @cfg->gridRows //perform Gustavson over rows blocks -> M / @cfg->gridRows
ulong rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows; idx_t rowBlock = AB->M/cfg->gridRows, rowBlockRem = AB->M%cfg->gridRows;
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg); ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (cfg->gridRows,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
ulong b,startRow,block; //omp for aux vars idx_t b,startRow,block; //omp for aux vars
#pragma omp parallel for schedule(runtime) private(acc,startRow,block) #pragma omp parallel for schedule(runtime) private(acc,startRow,block)
for (b=0; b < cfg->gridRows; b++){ for (b=0; b < cfg->gridRows; b++){
block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem); block = UNIF_REMINDER_DISTRI(b,rowBlock,rowBlockRem);
@ -255,13 +255,13 @@ spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUGPRINT{ DEBUGPRINT{
fflush(NULL); fflush(NULL);
printf("block %lu\t%lu:%lu(%lu)\n",b,startRow,startRow+block-1,block); printf("block %d\t%d:%d(%d)\n",b,startRow,startRow+block-1,block);
fflush(NULL); fflush(NULL);
} }
//row-by-row formulation in the given row block //row-by-row formulation in the given row block
for (ulong r=startRow; r<startRow+block; r++){ for (idx_t r=startRow; r<startRow+block; r++){
//iterate over nz entry index c inside current row r //iterate over nz entry index c inside current row r
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) for (idx_t c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++)
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc); CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
//trasform accumulated dense vector to a CSR row //trasform accumulated dense vector to a CSR row
#if SPARSIFY_PRE_PARTITIONING == T #if SPARSIFY_PRE_PARTITIONING == T
@ -295,7 +295,7 @@ spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
///2D ///2D
//PARTITIONS NOT ALLOCATED //PARTITIONS NOT ALLOCATED
spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%luxN=%lu\n",A->M,B->N); DEBUG printf("spmm\trowBlocks of A ,\tcolBlocks of B\tM=%dxN=%d\n",A->M,B->N);
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
idx_t* bColOffsets = NULL; //B group columns starting offset for each row idx_t* bColOffsets = NULL; //B group columns starting offset for each row
ACC_DENSE *accVectors=NULL,*accV; ACC_DENSE *accVectors=NULL,*accV;
@ -305,10 +305,10 @@ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
idx_t* rowsPartsSizes=NULL; idx_t* rowsPartsSizes=NULL;
if (!AB) goto _err; if (!AB) goto _err;
//2D indexing aux vars //2D indexing aux vars
ulong gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; idx_t _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; idx_t _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables idx_t startRow,startCol,rowBlock,colBlock; //data division aux variables
////get bColOffsets for B column groups ////get bColOffsets for B column groups
if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) if (!(bColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols)))
goto _err; goto _err;
@ -344,8 +344,8 @@ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg); ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
ulong tileID,t_i,t_j; //for aux vars idx_t tileID,t_i,t_j; //for aux vars
ulong bPartLen,bPartID,bPartOffset;//B partition acces aux vars idx_t bPartLen,bPartID,bPartOffset;//B partition acces aux vars
#pragma omp parallel for schedule(runtime) \ #pragma omp parallel for schedule(runtime) \
private(accV,accRowPart,rowBlock,colBlock,startRow,startCol,\ private(accV,accRowPart,rowBlock,colBlock,startRow,startCol,\
bPartLen,bPartID,bPartOffset,t_i,t_j) bPartLen,bPartID,bPartOffset,t_i,t_j)
@ -364,15 +364,15 @@ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUGPRINT{ DEBUGPRINT{
fflush(NULL); fflush(NULL);
colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem); colBlock = UNIF_REMINDER_DISTRI(t_j,_colBlock,_colBlockRem);
printf("rowBlock [%lu\t%lu:%lu(%lu)]\t",t_i,startRow,startRow+rowBlock-1,rowBlock); printf("rowBlock [%d\t%d:%d(%d)]\t",t_i,startRow,startRow+rowBlock-1,rowBlock);
printf("colBlock [%lu\t%lu:%lu(%lu)]\n",t_j,startCol,startCol+colBlock-1,colBlock); printf("colBlock [%d\t%d:%d(%d)]\n",t_j,startCol,startCol+colBlock-1,colBlock);
fflush(NULL); fflush(NULL);
} }
///AB[t_i][t_j] block compute ///AB[t_i][t_j] block compute
for (ulong r=startRow; r<startRow+rowBlock; r++){ for (idx_t r=startRow; r<startRow+rowBlock; r++){
//iterate over nz col index j inside current row r //iterate over nz col index j inside current row r
//row-by-row restricted to colsubset of B to get AB[r][:colBlock:] //row-by-row restricted to colsubset of B to get AB[r][:colBlock:]
for (ulong j=A->IRP[r]-OFF_F,c; j<A->IRP[r+1]-OFF_F; j++){ for (idx_t j=A->IRP[r]-OFF_F,c; j<A->IRP[r+1]-OFF_F; j++){
//get start of B[A->JA[j]][:colBlock:] //get start of B[A->JA[j]][:colBlock:]
c = A->JA[j]-OFF_F; // col of nnz in A[r][:] <-> target B row c = A->JA[j]-OFF_F; // col of nnz in A[r][:] <-> target B row
bPartID = IDX2D(c,t_j,cfg->gridCols); bPartID = IDX2D(c,t_j,cfg->gridCols);
@ -415,7 +415,7 @@ spmat* CAT(spmmRowByRow2DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
} }
spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A,\tcolBlocks (allcd) of B\tM=%luxN=%lu\n",A->M,B->N); DEBUG printf("spmm\trowBlocks of A,\tcolBlocks (allcd) of B\tM=%dxN=%d\n",A->M,B->N);
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols); DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
spmat *AB = NULL, *colPartsB = NULL, *colPart; spmat *AB = NULL, *colPartsB = NULL, *colPart;
idx_t* rowsPartsSizes=NULL; idx_t* rowsPartsSizes=NULL;
@ -423,14 +423,14 @@ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg)
SPMM_ACC* outAccumul=NULL; SPMM_ACC* outAccumul=NULL;
ACC_DENSE *accVectors=NULL,*accV; ACC_DENSE *accVectors=NULL,*accV;
SPACC* accRowPart; SPACC* accRowPart;
ulong startRow,startCol,rowBlock,colBlock; //data division aux variables idx_t startRow,startCol,rowBlock,colBlock; //data division aux variables
//2D indexing aux vars //2D indexing aux vars
idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols; idx_t gridSize=cfg->gridRows*cfg->gridCols, aSubRowsN=A->M*cfg->gridCols;
idx_t* bColOffsets = NULL; idx_t* bColOffsets = NULL;
if (!(AB = allocSpMatrix(A->M,B->N))) goto _err; if (!(AB = allocSpMatrix(A->M,B->N))) goto _err;
ulong _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows; idx_t _rowBlock = AB->M/cfg->gridRows, _rowBlockRem = AB->M%cfg->gridRows;
ulong _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols; idx_t _colBlock = AB->N/cfg->gridCols, _colBlockRem = AB->N%cfg->gridCols;
////B cols partition in CSRs ////B cols partition in CSRs
//if (!(colPartsB = CAT(colsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) goto _err; //if (!(colPartsB = CAT(colsPartitioningUnifRanges_,OFF_F)(B,cfg->gridCols))) goto _err;
@ -465,7 +465,7 @@ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg)
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg); ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (gridSize,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
ulong tileID,t_i,t_j; //for aux vars idx_t tileID,t_i,t_j; //for aux vars
#pragma omp parallel for schedule(runtime) \ #pragma omp parallel for schedule(runtime) \
private(accV,accRowPart,colPart,rowBlock,colBlock,startRow,startCol,t_i,t_j) private(accV,accRowPart,colPart,rowBlock,colBlock,startRow,startCol,t_i,t_j)
for (tileID = 0; tileID < gridSize; tileID++){ for (tileID = 0; tileID < gridSize; tileID++){
@ -484,17 +484,17 @@ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg)
DEBUGPRINT{ DEBUGPRINT{
fflush(NULL); fflush(NULL);
printf("rowBlock [%lu\t%lu:%lu(%lu)]\t", printf("rowBlock [%d\t%d:%d(%d)]\t",
t_i,startRow,startRow+rowBlock-1,rowBlock); t_i,startRow,startRow+rowBlock-1,rowBlock);
printf("colBlock [%lu\t%lu:%lu(%lu)]\n", printf("colBlock [%d\t%d:%d(%d)]\n",
t_j,startCol,startCol+colBlock-1,colBlock); t_j,startCol,startCol+colBlock-1,colBlock);
fflush(NULL); fflush(NULL);
} }
///AB[t_i][t_j] block compute ///AB[t_i][t_j] block compute
for (ulong r=startRow; r<startRow+rowBlock; r++){ for (idx_t r=startRow; r<startRow+rowBlock; r++){
//iterate over nz col index j inside current row r //iterate over nz col index j inside current row r
//row-by-row restricted to colsubset of B to get AB[r][:colBlock:] //row-by-row restricted to colsubset of B to get AB[r][:colBlock:]
for (ulong j=A->IRP[r]-OFF_F,c,bRowStart,bRowLen; j<A->IRP[r+1]-OFF_F; j++){ for (idx_t j=A->IRP[r]-OFF_F,c,bRowStart,bRowLen; j<A->IRP[r+1]-OFF_F; j++){
//get start of B[A->JA[j]][:colBlock:] //get start of B[A->JA[j]][:colBlock:]
c = A->JA[j]-OFF_F; // column of nnz entry in A[r][:] <-> target B row c = A->JA[j]-OFF_F; // column of nnz entry in A[r][:] <-> target B row
bRowStart = colPart->IRP[c]; bRowStart = colPart->IRP[c];
@ -532,7 +532,7 @@ spmat* CAT(spmmRowByRow2DBlocksAllocated_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg)
AB = NULL; AB = NULL;
_free: _free:
if (colPartsB){ if (colPartsB){
for (ulong i=0; i<cfg->gridCols; i++) for (idx_t i=0; i<cfg->gridCols; i++)
freeSpmatInternal(colPartsB+i); freeSpmatInternal(colPartsB+i);
free(colPartsB); free(colPartsB);
} }
@ -558,7 +558,7 @@ spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P,
/* TODO /* TODO
alloc dense aux vector, reusable over 3 product alloc dense aux vector, reusable over 3 product
TODO arrays sovrallocati per poter essere riusati nelle 2 SpMM TODO arrays sovrallocati per poter essere riusati nelle 2 SpMM
ulong auxVectSize = MAX(R->N,AC->N); idx_t auxVectSize = MAX(R->N,AC->N);
auxVectSize = MAX(auxVectSize,P->N); auxVectSize = MAX(auxVectSize,P->N);
*/ */
@ -590,7 +590,7 @@ spmat* CAT(sp3mmRowByRowPair_,OFF_F)(spmat* R,spmat* AC,spmat* P,
spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg, spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,
SPMM_INTERF spmm){ SPMM_INTERF spmm){
ulong* rowSizes = NULL; idx_t* rowSizes = NULL;
SPMM_ACC* outAccumul=NULL; SPMM_ACC* outAccumul=NULL;
ACC_DENSE *accVectorsR_AC=NULL,*accVectorsRAC_P=NULL,*accRAC,*accRACP; ACC_DENSE *accVectorsR_AC=NULL,*accVectorsRAC_P=NULL,*accRAC,*accRACP;
@ -619,11 +619,11 @@ spmat* CAT(sp3mmRowByRowMerged_,OFF_F)(spmat* R,spmat* AC,spmat* P,CONFIG* cfg,
goto _err; goto _err;
} }
ulong c; idx_t c;
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (R->M,R,cfg); ((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (R->M,R,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime(); AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
#pragma omp parallel for schedule(runtime) private(accRAC,accRACP,c) #pragma omp parallel for schedule(runtime) private(accRAC,accRACP,c)
for (ulong r=0; r<R->M; r++){ //row-by-row formulation for (idx_t r=0; r<R->M; r++){ //row-by-row formulation
//iterate over nz entry index c inside current row r //iterate over nz entry index c inside current row r
accRAC = accVectorsR_AC + omp_get_thread_num(); accRAC = accVectorsR_AC + omp_get_thread_num();
accRACP = accVectorsRAC_P + omp_get_thread_num(); accRACP = accVectorsRAC_P + omp_get_thread_num();

@ -46,10 +46,10 @@
* alloc threads' aux arrays once and split them in threads' structures * alloc threads' aux arrays once and split them in threads' structures
* so free them once from the first thread struct, with the original pointers returned from the alloc * so free them once from the first thread struct, with the original pointers returned from the alloc
*/ */
ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size){ //TODO PERF WITH NEXT ACC_DENSE* _initAccVectors_monoalloc(idx_t num,idx_t size){ //TODO PERF WITH NEXT
ACC_DENSE* out = NULL; ACC_DENSE* out = NULL;
double* vAll = NULL; double* vAll = NULL;
ulong* vAllNzIdx = NULL; idx_t* vAllNzIdx = NULL;
if (!(out = calloc(num,sizeof(*out)))){ if (!(out = calloc(num,sizeof(*out)))){
ERRPRINT("_initAccVectors aux struct alloc failed\n"); ERRPRINT("_initAccVectors aux struct alloc failed\n");
return NULL; return NULL;
@ -62,7 +62,7 @@ ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size){ //TODO PERF WITH NEX
ERRPRINT("_initAccVectors aux dense vectors' idx alloc failed\n"); ERRPRINT("_initAccVectors aux dense vectors' idx alloc failed\n");
goto err; goto err;
} }
for (ulong i=0; i<num; i++){ for (idx_t i=0; i<num; i++){
out[i].vLen = size; //TODO USELESS INFO? out[i].vLen = size; //TODO USELESS INFO?
out[i].v = vAll + i*size; out[i].v = vAll + i*size;
out[i].nnzIdx = vAllNzIdx + i*size;; out[i].nnzIdx = vAllNzIdx + i*size;;
@ -76,7 +76,7 @@ ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size){ //TODO PERF WITH NEX
free(vAllNzIdx); free(vAllNzIdx);
return NULL; return NULL;
} }
int allocAccDense(ACC_DENSE* v,ulong size){ int allocAccDense(ACC_DENSE* v,idx_t size){
v->vLen = size; v->vLen = size;
if (!(v->v = calloc(size,sizeof(*(v->v))))) { if (!(v->v = calloc(size,sizeof(*(v->v))))) {
ERRPRINT("_initAccVectors aux dense vector alloc failed\n"); ERRPRINT("_initAccVectors aux dense vector alloc failed\n");
@ -91,19 +91,19 @@ int allocAccDense(ACC_DENSE* v,ulong size){
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
ACC_DENSE* _initAccVectors(ulong num,ulong size){ ACC_DENSE* _initAccVectors(idx_t num,idx_t size){
ACC_DENSE* out = NULL; ACC_DENSE* out = NULL;
if (!(out = calloc(num,sizeof(*out)))){ if (!(out = calloc(num,sizeof(*out)))){
ERRPRINT("_initAccVectors aux struct alloc failed\n"); ERRPRINT("_initAccVectors aux struct alloc failed\n");
return NULL; return NULL;
} }
for (ulong i=0; i<num; i++){ for (idx_t i=0; i<num; i++){
if (allocAccDense(out+i,size)) goto _err; if (allocAccDense(out+i,size)) goto _err;
} }
return out; return out;
_err: _err:
for (ulong i=0; i<num; i++){ for (idx_t i=0; i<num; i++){
if (out[i].v) free(out[i].v); if (out[i].v) free(out[i].v);
if (out[i].nnzIdx) free(out[i].nnzIdx); if (out[i].nnzIdx) free(out[i].nnzIdx);
} }
@ -111,17 +111,17 @@ ACC_DENSE* _initAccVectors(ulong num,ulong size){
return NULL; return NULL;
} }
void freeAccsDense(ACC_DENSE* vectors,ulong num){ void freeAccsDense(ACC_DENSE* vectors,idx_t num){
for (ulong i=0; i<num; i++){ for (idx_t i=0; i<num; i++){
free(vectors[i].v); free(vectors[i].v);
free(vectors[i].nnzIdx); free(vectors[i].nnzIdx);
free(vectors[i].nnzIdxMap.idxsMap); free(vectors[i].nnzIdxMap.idxsMap);
} }
free(vectors); free(vectors);
} }
void _freeAccsDenseChecks(ACC_DENSE* vectors,ulong num){ void _freeAccsDenseChecks(ACC_DENSE* vectors,idx_t num){
if (!vectors) return; if (!vectors) return;
for (ulong i=0; i<num; i++){ for (idx_t i=0; i<num; i++){
if(vectors[i].v) free(vectors[i].v); if(vectors[i].v) free(vectors[i].v);
if(vectors[i].nnzIdx) free(vectors[i].nnzIdx); if(vectors[i].nnzIdx) free(vectors[i].nnzIdx);
} }
@ -143,8 +143,8 @@ int initSpVectIdxDenseAcc(idx_t idxMax,SPVECT_IDX_DENSE_MAP* vectIdxsMap){
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
void checkOverallocPercent(ulong* forecastedSizes,spmat* AB){ void checkOverallocPercent(idx_t* forecastedSizes,spmat* AB){
for (ulong r=0,rSize,forecastedSize; r < AB->M; r++){ for (idx_t r=0,rSize,forecastedSize; r < AB->M; r++){
forecastedSize = forecastedSizes[r]; forecastedSize = forecastedSizes[r];
#ifdef ROWLENS #ifdef ROWLENS
rSize = AB->RL[r]; rSize = AB->RL[r];
@ -158,11 +158,11 @@ void checkOverallocPercent(ulong* forecastedSizes,spmat* AB){
} }
} }
DEBUGPRINT DEBUGPRINT
printf("extra forecastedSize of row: %lu\t=\t%lf %% \n", printf("extra forecastedSize of row: %d\t=\t%lf %% \n",
r,100*(forecastedSize-rSize) / (double) forecastedSize); r,100*(forecastedSize-rSize) / (double) forecastedSize);
} }
idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ; idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ;
printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n", printf("extra forecastedSize of the matrix: \t%d\t = %lf %% \n",
extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]); extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]);
} }
int spmatDiff(spmat* A, spmat* B){ int spmatDiff(spmat* A, spmat* B){
@ -187,8 +187,8 @@ int spmatDiff(spmat* A, spmat* B){
double* CSRToDense(spmat* sparseMat){ double* CSRToDense(spmat* sparseMat){
double* denseMat; double* denseMat;
ulong i,j,idxNZ,denseSize; idx_t i,j,idxNZ,denseSize;
if (__builtin_umull_overflow(sparseMat->M,sparseMat->N,&denseSize)){ if (__builtin_smul_overflow(sparseMat->M,sparseMat->N,&denseSize)){
ERRPRINT("overflow in dense allocation\n"); ERRPRINT("overflow in dense allocation\n");
return NULL; return NULL;
} }
@ -200,7 +200,7 @@ double* CSRToDense(spmat* sparseMat){
for (idxNZ=sparseMat->IRP[i]; idxNZ<sparseMat->IRP[i+1]; ++idxNZ){ for (idxNZ=sparseMat->IRP[i]; idxNZ<sparseMat->IRP[i+1]; ++idxNZ){
j = sparseMat->JA[idxNZ]; j = sparseMat->JA[idxNZ];
//converting sparse item into dense entry //converting sparse item into dense entry
denseMat[(ulong) IDX2D(i,j,sparseMat->N)] = sparseMat->AS[idxNZ]; denseMat[(idx_t) IDX2D(i,j,sparseMat->N)] = sparseMat->AS[idxNZ];
} }
} }
return denseMat; return denseMat;
@ -214,17 +214,17 @@ void printSparseMatrix(spmat* spMatrix,char justNZMarkers){
static inline int _colsPartitioningUnifRanges_init(spmat* A,uint gridCols, static inline int _colsPartitioningUnifRanges_init(spmat* A,int gridCols,
spmat** colParts,idx_t** colPartsLens){ spmat** colParts,idx_t** colPartsLens){
spmat* colPart; spmat* colPart;
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *tmpJA; idx_t _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *tmpJA;
///alloc/init partitions structures ///alloc/init partitions structures
if (!(*colParts = calloc(gridCols, sizeof(**colParts)))){ if (!(*colParts = calloc(gridCols, sizeof(**colParts)))){
ERRPRINT("colsPartitioningUnifRanges\tcolumns partitions of A calloc fail\n"); ERRPRINT("colsPartitioningUnifRanges\tcolumns partitions of A calloc fail\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }
for (ulong i=0,colBlock; i<gridCols; i++){ for (idx_t i=0,colBlock; i<gridCols; i++){
colBlock = UNIF_REMINDER_DISTRI(i,_colBlock,_colBlockRem); colBlock = UNIF_REMINDER_DISTRI(i,_colBlock,_colBlockRem);
colPart = *colParts + i; colPart = *colParts + i;
if (allocSpMatrixInternal(A->M,colBlock,colPart)){ if (allocSpMatrixInternal(A->M,colBlock,colPart)){
@ -249,13 +249,13 @@ static inline int _colsPartitioningUnifRanges_init(spmat* A,uint gridCols,
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,uint gridCols, static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,int gridCols,
spmat* colParts,idx_t* colPartsLens){ spmat* colParts,idx_t* colPartsLens){
spmat* colPart; spmat* colPart;
double* tmpAS; idx_t* tmpJA; double* tmpAS; idx_t* tmpJA;
//realloc overallcd A parts NZ arrays (TODO -> downsizing -> nofails?) //realloc overallcd A parts NZ arrays (TODO -> downsizing -> nofails?)
for (ulong i=0,partLen; i<gridCols; i++){ for (idx_t i=0,partLen; i<gridCols; i++){
colPart = colParts + i; colPart = colParts + i;
partLen = colPartsLens[i]; partLen = colPartsLens[i];
if (!(tmpAS = realloc(colPart->AS,partLen*sizeof(*(colPart->AS))))){ if (!(tmpAS = realloc(colPart->AS,partLen*sizeof(*(colPart->AS))))){
@ -281,22 +281,22 @@ static inline int _colsPartitioningUnifRanges_finalRealloc(spmat* A,uint gridCol
#endif #endif
//////////////////////// CSR SPECIFIC -- TODO RENAME ////////////////// //////////////////////// CSR SPECIFIC -- TODO RENAME //////////////////
///SPARSE MATRIX PARTITIONING ///SPARSE MATRIX PARTITIONING
idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){ idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,int gridCols){
ulong subRowsN = A->M * gridCols; idx_t subRowsN = A->M * gridCols;
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols; idx_t _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols;
ulong* offsets = malloc( (subRowsN+1) * sizeof(*offsets) ); idx_t* offsets = malloc( (subRowsN+1) * sizeof(*offsets) );
if (!offsets) { if (!offsets) {
ERRPRINT("colsOffsetsPartitioningUnifRanges:\toffsets malloc errd\n"); ERRPRINT("colsOffsetsPartitioningUnifRanges:\toffsets malloc errd\n");
return NULL; return NULL;
} }
///OFFSETS COMPUTE FOR COL GROUPS -> O( A.NZ ) ///OFFSETS COMPUTE FOR COL GROUPS -> O( A.NZ )
for (ulong r=0, j=0; r<A->M; j=A->IRP[++r]-OFF_F){ for (idx_t r=0, j=0; r<A->M; j=A->IRP[++r]-OFF_F){
offsets[ IDX2D(r,0,gridCols) ] = j; //row's first gc start is costrained offsets[ IDX2D(r,0,gridCols) ] = j; //row's first gc start is costrained
//navigate column groups inside current row //navigate column groups inside current row
for (ulong gc=1,gcStartCol; gc<gridCols; gc++){ for (idx_t gc=1,gcStartCol; gc<gridCols; gc++){
gcStartCol = UNIF_REMINDER_DISTRI_STARTIDX(gc,_colBlock,_colBlockRem); gcStartCol = UNIF_REMINDER_DISTRI_STARTIDX(gc,_colBlock,_colBlockRem);
//goto GroupCols start entry,keeping A's nnz entries navigation (idx j) //goto GroupCols start entry,keeping A's nnz entries navigation (idx j)
//for (ulong c=A->JA[j]-OFF_F; c<gcStartCol && j < A->IRP[r+1]-OFF_F; c=A->JA[++j]-OFF_F); //for (idx_t c=A->JA[j]-OFF_F; c<gcStartCol && j < A->IRP[r+1]-OFF_F; c=A->JA[++j]-OFF_F);
while (j < A->IRP[r+1]-OFF_F && A->JA[j]-OFF_F < gcStartCol) while (j < A->IRP[r+1]-OFF_F && A->JA[j]-OFF_F < gcStartCol)
j++; j++;
offsets[ IDX2D(r,gc,gridCols) ] = j; //row's gc group startIdx offsets[ IDX2D(r,gc,gridCols) ] = j; //row's gc group startIdx
@ -306,12 +306,12 @@ idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){
return offsets; return offsets;
} }
spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols, spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,int gridCols,
idx_t** colPartsOffsets) idx_t** colPartsOffsets)
{ {
spmat *colParts = NULL, *colPart; spmat *colParts = NULL, *colPart;
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols; idx_t _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols;
ulong *colPartsLens=NULL, *tmpJA; idx_t *colPartsLens=NULL, *tmpJA;
double* tmpAS; double* tmpAS;
///alloc/init partitions structures ///alloc/init partitions structures
@ -349,15 +349,15 @@ spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols,
return colParts; return colParts;
_err: _err:
free(*colPartsOffsets); free(*colPartsOffsets);
for (ulong i=0; i<gridCols; i++) freeSpmatInternal(colParts+i); for (idx_t i=0; i<gridCols; i++) freeSpmatInternal(colParts+i);
free(colOffsets); free(colOffsets);
free(colParts); free(colParts);
free(colPartsLens); free(colPartsLens);
return NULL; return NULL;
} }
spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){ spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,int gridCols){
spmat *colParts, *colPart; spmat *colParts, *colPart;
ulong _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *colPartsLens=NULL, *tmpJA; idx_t _colBlock = A->N/gridCols, _colBlockRem = A->N%gridCols, *colPartsLens=NULL, *tmpJA;
double* tmpAS; double* tmpAS;
///alloc/init partitions structures ///alloc/init partitions structures
if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens)) if (_colsPartitioningUnifRanges_init(A,gridCols,&colParts,&colPartsLens))
@ -366,9 +366,9 @@ spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){
* Parallelize: 2for collapse OMP, gcEndCol -> startIdxize, ... * Parallelize: 2for collapse OMP, gcEndCol -> startIdxize, ...
* oppure wrappare cio in static inline * oppure wrappare cio in static inline
*/ */
for (ulong r=0, j=0; r<A->M; j=A->IRP[++r]-OFF_F){ for (idx_t r=0, j=0; r<A->M; j=A->IRP[++r]-OFF_F){
//navigate column groups inside current row //navigate column groups inside current row
for (ulong gc=0,gcEndCol=0,i; gc<gridCols ; gc++,j+=i){ for (idx_t gc=0,gcEndCol=0,i; gc<gridCols ; gc++,j+=i){
i = 0; //@i=len current subpartition of row @r to copy i = 0; //@i=len current subpartition of row @r to copy
colPart = colParts + gc; colPart = colParts + gc;
//NB: not shifting IRP because is handled as internal implementation component //NB: not shifting IRP because is handled as internal implementation component
@ -376,7 +376,7 @@ spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){
colPart->IRP[r] = colPartsLens[gc]; colPart->IRP[r] = colPartsLens[gc];
gcEndCol += UNIF_REMINDER_DISTRI(gc,_colBlock,_colBlockRem); gcEndCol += UNIF_REMINDER_DISTRI(gc,_colBlock,_colBlockRem);
//goto next GroupCols,keeping A's nnz entries navigation ( index j+i ) //goto next GroupCols,keeping A's nnz entries navigation ( index j+i )
//for (ulong c=A->JA[j+i]-OFF_F; c<gcEndCol && j+i < A->IRP[r+1]-OFF_F; c=A->JA[j+ ++i]-OFF_F); //for (idx_t c=A->JA[j+i]-OFF_F; c<gcEndCol && j+i < A->IRP[r+1]-OFF_F; c=A->JA[j+ ++i]-OFF_F);
while ( j+i < A->IRP[r+1]-OFF_F && A->JA[j+i]-OFF_F < gcEndCol ) i++; while ( j+i < A->IRP[r+1]-OFF_F && A->JA[j+i]-OFF_F < gcEndCol ) i++;
memcpy(colPart->AS+colPart->IRP[r], A->AS+j, i*sizeof(*A->AS)); memcpy(colPart->AS+colPart->IRP[r], A->AS+j, i*sizeof(*A->AS));
memcpy(colPart->JA+colPart->IRP[r], A->JA+j, i*sizeof(*A->JA)); memcpy(colPart->JA+colPart->IRP[r], A->JA+j, i*sizeof(*A->JA));
@ -393,14 +393,14 @@ spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols){
free(colPartsLens); free(colPartsLens);
return colParts; return colParts;
_err: _err:
for (ulong i=0; i<gridCols; i++) freeSpmatInternal(colParts+i); for (idx_t i=0; i<gridCols; i++) freeSpmatInternal(colParts+i);
if(colParts) free(colParts); if(colParts) free(colParts);
if(colPartsLens) free(colPartsLens); if(colPartsLens) free(colPartsLens);
return NULL; return NULL;
} }
void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB, void CAT(checkOverallocRowPartsPercent_,OFF_F)(idx_t* forecastedSizes,spmat* AB,
idx_t gridCols,idx_t* bColOffsets){ idx_t gridCols,idx_t* bColOffsets){
idx_t* abColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(AB, gridCols); idx_t* abColOffsets = CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(AB, gridCols);
assert(abColOffsets); //partitioning error assert(abColOffsets); //partitioning error
@ -410,7 +410,7 @@ void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB,
DEBUGCHECKS assert(forecast >= rLen); DEBUGCHECKS assert(forecast >= rLen);
} }
idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ; idx_t extraMatrix = forecastedSizes[AB->M] - AB->NZ;
printf("extra forecastedSize of the matrix: \t%lu\t = %lf %% \n", printf("extra forecastedSize of the matrix: \t%d\t = %lf %% \n",
extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]); extraMatrix, 100*extraMatrix /(double) forecastedSizes[AB->M]);
free(abColOffsets); free(abColOffsets);
@ -422,25 +422,25 @@ void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB,
///inline export here ///inline export here
//SPMV_CHUNKS_DISTR spmvChunksFair; //SPMV_CHUNKS_DISTR spmvChunksFair;
spmat* allocSpMatrix(ulong rows, ulong cols); spmat* allocSpMatrix(idx_t rows, idx_t cols);
int allocSpMatrixInternal(ulong rows, ulong cols, spmat* mat); int allocSpMatrixInternal(idx_t rows, idx_t cols, spmat* mat);
void freeSpmatInternal(spmat* mat); void freeSpmatInternal(spmat* mat);
void freeSpmat(spmat* mat); void freeSpmat(spmat* mat);
////INTERNAL TEST FUNCTIONS ////INTERNAL TEST FUNCTIONS
//test that each row's partition from colsOffsetsPartitioningUnifRanges is in the correct index range //test that each row's partition from colsOffsetsPartitioningUnifRanges is in the correct index range
#include <alloca.h> #include <alloca.h>
int testColsOffsetsPartitioningUnifRanges(spmat* mat,ulong gridCols,ulong* partsOffs){ int testColsOffsetsPartitioningUnifRanges(spmat* mat,idx_t gridCols,idx_t* partsOffs){
ulong _colBlock = mat->N/gridCols, _colBlockRem = mat->N%gridCols; idx_t _colBlock = mat->N/gridCols, _colBlockRem = mat->N%gridCols;
ulong j=0; //CSR scanning nnz idx idx_t j=0; //CSR scanning nnz idx
ulong* colPartsPopulations = alloca(gridCols * sizeof(*colPartsPopulations)); idx_t* colPartsPopulations = alloca(gridCols * sizeof(*colPartsPopulations));
memset(colPartsPopulations,0,gridCols * sizeof(*colPartsPopulations)); memset(colPartsPopulations,0,gridCols * sizeof(*colPartsPopulations));
for (ulong r=0,pId=0; r<mat->M; r++){ for (idx_t r=0,pId=0; r<mat->M; r++){
for (ulong gc=0,pStartIdx,pEndIdx; gc<gridCols; gc++,pId++){ for (idx_t gc=0,pStartIdx,pEndIdx; gc<gridCols; gc++,pId++){
pStartIdx = UNIF_REMINDER_DISTRI_STARTIDX(gc,_colBlock,_colBlockRem); pStartIdx = UNIF_REMINDER_DISTRI_STARTIDX(gc,_colBlock,_colBlockRem);
pEndIdx = UNIF_REMINDER_DISTRI_STARTIDX(gc+1,_colBlock,_colBlockRem)-1; pEndIdx = UNIF_REMINDER_DISTRI_STARTIDX(gc+1,_colBlock,_colBlockRem)-1;
//pId=IDX2D(r,gc,gridCols); //pId=IDX2D(r,gc,gridCols);
for (ulong idx=partsOffs[pId],c; idx<partsOffs[pId+1]; idx++,j++){ for (idx_t idx=partsOffs[pId],c; idx<partsOffs[pId+1]; idx++,j++){
c = mat->JA[idx]; c = mat->JA[idx];
assert(j == idx); //consecutive index in partitioning assert(j == idx); //consecutive index in partitioning
assert(pStartIdx <= c && c <= pEndIdx); //colRange assert(pStartIdx <= c && c <= pEndIdx); //colRange
@ -450,12 +450,12 @@ int testColsOffsetsPartitioningUnifRanges(spmat* mat,ulong gridCols,ulong* parts
} }
} }
assert(j == mat->NZ); assert(j == mat->NZ);
ulong s=0; idx_t s=0;
for (ulong gc=0,partSize; gc < gridCols; gc++,s+=partSize){ for (idx_t gc=0,partSize; gc < gridCols; gc++,s+=partSize){
partSize = colPartsPopulations[gc]; partSize = colPartsPopulations[gc];
double partShare=partSize/(double)mat->NZ,partsAvg=1/(double)gridCols; double partShare=partSize/(double)mat->NZ,partsAvg=1/(double)gridCols;
double partShareAvgDiff = partShare - partsAvg; double partShareAvgDiff = partShare - partsAvg;
printf("colPartition %lu has:\t%lu = %lf of NNZ\t\t .. %lf\tAVG diff\n", printf("colPartition %d has:\t%d = %lf of NNZ\t\t .. %lf\tAVG diff\n",
gc,partSize,partShare,partShareAvgDiff); gc,partSize,partShare,partShareAvgDiff);
} }
assert(s == mat->NZ); //TODO DUPLICATED assert(s == mat->NZ); //TODO DUPLICATED
@ -482,14 +482,14 @@ int main(int argc, char** argv){
return out; return out;
} }
////partitioning test ////partitioning test
ulong* colsPartitions = colsOffsetsPartitioningUnifRanges_0(mat,Conf.gridCols); idx_t* colsPartitions = colsOffsetsPartitioningUnifRanges_0(mat,Conf.gridCols);
if (!colsPartitions) goto _free; if (!colsPartitions) goto _free;
if (testColsOffsetsPartitioningUnifRanges(mat,Conf.gridCols,colsPartitions)) if (testColsOffsetsPartitioningUnifRanges(mat,Conf.gridCols,colsPartitions))
goto _free; goto _free;
out=EXIT_SUCCESS; out=EXIT_SUCCESS;
printf("testColsOffsetsPartitioningUnifRanges passed with " printf("testColsOffsetsPartitioningUnifRanges passed with "
"mat: %lux%lu-%luNNZ\tgrid: %dx%d\n", "mat: %dx%d-%dNNZ\tgrid: %dx%d\n",
mat->M,mat->N,mat->NZ,Conf.gridRows,Conf.gridCols); mat->M,mat->N,mat->NZ,Conf.gridRows,Conf.gridCols);
_free: _free:
if (colsPartitions) free(colsPartitions); if (colsPartitions) free(colsPartitions);

@ -145,12 +145,12 @@ int fread_wrap(FILE* fp,void* dst,size_t count){
return rd; return rd;
} }
///STRUCTURED DATA IO ///STRUCTURED DATA IO
int writeDoubleVector(char* fpath,double* v,ulong size){ int writeDoubleVector(char* fpath,double* v,idx_t size){
int fd,out=EXIT_FAILURE; int fd,out=EXIT_FAILURE;
if ( (fd = createNewFile(fpath) ) < 0 ) goto _end; if ( (fd = createNewFile(fpath) ) < 0 ) goto _end;
write_wrap(fd,v,size * sizeof(*v)); //raw write of vector write_wrap(fd,v,size * sizeof(*v)); //raw write of vector
out = EXIT_SUCCESS; out = EXIT_SUCCESS;
DEBUG printf("written double vector into %s RAW of %lu elements\n",fpath,size); DEBUG printf("written double vector into %s RAW of %d elements\n",fpath,size);
_end: _end:
if (close(fd) == EOF) perror("close errd\n"); if (close(fd) == EOF) perror("close errd\n");
@ -158,21 +158,21 @@ int writeDoubleVector(char* fpath,double* v,ulong size){
} }
///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF ///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF
int writeDoubleVectorAsStr(char* fpath,double* v,ulong size){ int writeDoubleVectorAsStr(char* fpath,double* v,idx_t size){
int out=EXIT_FAILURE; int out=EXIT_FAILURE;
FILE* fp = fopen(fpath,"w"); FILE* fp = fopen(fpath,"w");
if (!fp){ if (!fp){
perror("fopen vector file write"); perror("fopen vector file write");
return EXIT_FAILURE; return EXIT_FAILURE;
} }
for (ulong i=0; i<size; i++){ for (idx_t i=0; i<size; i++){
if (fprintf(fp,DOUBLE_STR_FORMAT,v[i]) < 0){ if (fprintf(fp,DOUBLE_STR_FORMAT,v[i]) < 0){
ERRPRINT("fprintf to out vector file errd\n"); ERRPRINT("fprintf to out vector file errd\n");
goto _end; goto _end;
} }
} }
out = EXIT_SUCCESS; out = EXIT_SUCCESS;
DEBUG printf("written vector into %s of %lu elements\n",fpath,size); DEBUG printf("written vector into %s of %d elements\n",fpath,size);
_end: _end:
if (fclose(fp) == EOF) perror("fclose errd\n"); if (fclose(fp) == EOF) perror("fclose errd\n");
@ -180,10 +180,10 @@ int writeDoubleVectorAsStr(char* fpath,double* v,ulong size){
} }
double* readDoubleVector(char* fpath,ulong* size){ double* readDoubleVector(char* fpath,idx_t* size){
int rd,hleft; int rd,hleft;
double *out,*tmp; double *out,*tmp;
ulong i=0,vectorSize=RNDVECTORSIZE; idx_t i=0,vectorSize=RNDVECTORSIZE;
if (*size) vectorSize = *size; if (*size) vectorSize = *size;
FILE* fp = fopen(fpath,"r"); FILE* fp = fopen(fpath,"r");
if (!fp){ if (!fp){
@ -198,13 +198,13 @@ double* readDoubleVector(char* fpath,ulong* size){
if (i >= vectorSize ){ //realloc the array if (i >= vectorSize ){ //realloc the array
vectorSize *= VECTOR_STEP_REALLOC; vectorSize *= VECTOR_STEP_REALLOC;
if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){ if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){
ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20); ERRPRINTS("realloc errd to ~~ %d MB\n",vectorSize >> 20);
goto _err; goto _err;
} }
out = tmp; out = tmp;
DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20); DEBUG printf("reallocd to ~~ %d MB\n",vectorSize >> 20);
} }
ulong toRead = MIN(VECTOR_READ_BLOCK,(vectorSize-i)); idx_t toRead = MIN(VECTOR_READ_BLOCK,(vectorSize-i));
if((rd = fread_wrap(fp,out + i,sizeof(*out)*toRead)) == -2){ if((rd = fread_wrap(fp,out + i,sizeof(*out)*toRead)) == -2){
ERRPRINT("fread_wrap errd\n"); ERRPRINT("fread_wrap errd\n");
goto _err; goto _err;
@ -227,7 +227,7 @@ double* readDoubleVector(char* fpath,ulong* size){
goto _err; goto _err;
} }
out = tmp; out = tmp;
DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size); DEBUG printf("readed vector from %s of %d elements\n",fpath,*size);
goto _free; goto _free;
_err: _err:
@ -238,10 +238,10 @@ double* readDoubleVector(char* fpath,ulong* size){
return out; return out;
} }
double* readDoubleVectorStr(char* fpath,ulong* size){ double* readDoubleVectorStr(char* fpath,idx_t* size){
int fscanfOut; int fscanfOut;
double *out,*tmp; double *out,*tmp;
ulong i=0,vectorSize=RNDVECTORSIZE; idx_t i=0,vectorSize=RNDVECTORSIZE;
if (*size) vectorSize = *size; if (*size) vectorSize = *size;
FILE* fp = fopen(fpath,"r"); FILE* fp = fopen(fpath,"r");
if (!fp){ if (!fp){
@ -256,11 +256,11 @@ double* readDoubleVectorStr(char* fpath,ulong* size){
if (i >= vectorSize ){ //realloc the array if (i >= vectorSize ){ //realloc the array
vectorSize *= VECTOR_STEP_REALLOC; vectorSize *= VECTOR_STEP_REALLOC;
if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){ if (!(tmp=realloc(out,vectorSize*sizeof(*out)))){
ERRPRINTS("realloc errd to ~~ %lu MB\n",vectorSize >> 20); ERRPRINTS("realloc errd to ~~ %d MB\n",vectorSize >> 20);
goto _err; goto _err;
} }
out = tmp; out = tmp;
DEBUG printf("reallocd to ~~ %lu MB\n",vectorSize >> 20); DEBUG printf("reallocd to ~~ %d MB\n",vectorSize >> 20);
} }
fscanfOut = fscanf(fp,DOUBLE_STR_FORMAT,out + i++ ); fscanfOut = fscanf(fp,DOUBLE_STR_FORMAT,out + i++ );
if ( fscanfOut == EOF && ferror(fp)){ if ( fscanfOut == EOF && ferror(fp)){
@ -277,7 +277,7 @@ double* readDoubleVectorStr(char* fpath,ulong* size){
goto _err; goto _err;
} }
out = tmp; out = tmp;
DEBUG printf("readed vector from %s of %lu elements\n",fpath,*size); DEBUG printf("readed vector from %s of %d elements\n",fpath,*size);
goto _free; goto _free;
_err: _err:
@ -292,10 +292,10 @@ double* readDoubleVectorStr(char* fpath,ulong* size){
int getConfig(CONFIG* conf){ int getConfig(CONFIG* conf){
int changes=EXIT_FAILURE; int changes=EXIT_FAILURE;
char *varVal,*ptr; char *varVal,*ptr;
ulong val; idx_t val;
if ((varVal = getenv(GRID_ROWS))){ if ((varVal = getenv(GRID_ROWS))){
val=strtoul(varVal,&ptr,10); val=strtoul(varVal,&ptr,10);
if (ptr==varVal || val>= UINT_MAX){ if (ptr==varVal || val>= INT_MAX){
perror("strtol errd"); perror("strtol errd");
} else { } else {
conf->gridRows = val; conf->gridRows = val;
@ -304,7 +304,7 @@ int getConfig(CONFIG* conf){
} }
if ((varVal = getenv(GRID_COLS))){ if ((varVal = getenv(GRID_COLS))){
val=strtoul(varVal,&ptr,10); val=strtoul(varVal,&ptr,10);
if (ptr==varVal || val>= UINT_MAX){ if (ptr==varVal || val>= INT_MAX){
perror("strtol errd"); perror("strtol errd");
} else { } else {
conf->gridCols = val; conf->gridCols = val;
@ -317,11 +317,11 @@ int getConfig(CONFIG* conf){
/////LIB-SORTING -- WRAPPERS /////LIB-SORTING -- WRAPPERS
//comparing functions //comparing functions
static inline int cmp_idx_t(const void* a, const void*b){ static inline int cmp_idx_t(const void* a, const void*b){
idx_t aa=*((ulong*) a), bb = *((ulong*) b); idx_t aa=*((idx_t*) a), bb = *((idx_t*) b);
return aa==bb?0:aa>bb?1:-1; return aa==bb?0:aa>bb?1:-1;
} }
static inline int cmpulong(const void* a, const void*b){ static inline int cmpidx_t(const void* a, const void*b){
ulong aa=*((ulong*) a), bb = *((ulong*) b); idx_t aa=*((idx_t*) a), bb = *((idx_t*) b);
return aa==bb?0:aa>bb?1:-1; return aa==bb?0:aa>bb?1:-1;
} }
static inline int cmpuint(const void* a, const void*b){ static inline int cmpuint(const void* a, const void*b){
@ -336,8 +336,8 @@ static inline int cmpRbNode(const void* a, const void* b){
void sort_idx_t(idx_t* arr, idx_t len){ void sort_idx_t(idx_t* arr, idx_t len){
qsort(arr,len,sizeof(*arr),cmp_idx_t); qsort(arr,len,sizeof(*arr),cmp_idx_t);
} }
void sortulong(ulong* arr, ulong len){ void sortidx_t(idx_t* arr, idx_t len){
qsort(arr,len,sizeof(*arr),cmpulong); qsort(arr,len,sizeof(*arr),cmpidx_t);
} }
void sortuint(uint* arr, uint len){ void sortuint(uint* arr, uint len){
qsort(arr,len,sizeof(*arr),cmpuint); qsort(arr,len,sizeof(*arr),cmpuint);
@ -368,7 +368,7 @@ static inline int rndDouble_sinDecimal(double* d){
ERRPRINT("read_wrap failed to read holder for rnd double\n"); ERRPRINT("read_wrap failed to read holder for rnd double\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }
*d = (_rndHold % (ulong) ceil(MAXRND)) + sin(_rndHold); *d = (_rndHold % (idx_t) ceil(MAXRND)) + sin(_rndHold);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
@ -383,8 +383,8 @@ void statsAvgVar(double* values,uint numVals, double* out){
} }
/// MATRIX - VECTOR COMPUTE UTILS /// MATRIX - VECTOR COMPUTE UTILS
int fillRndVector(ulong size, double* v){ int fillRndVector(idx_t size, double* v){
for( ulong x=0; x<size; ++x ){ for( idx_t x=0; x<size; ++x ){
if(rndDouble_sinAll( v+x )) return EXIT_FAILURE; if(rndDouble_sinAll( v+x )) return EXIT_FAILURE;
#ifdef RNDVECTMIN #ifdef RNDVECTMIN
v[x] += RNDVECTMIN; v[x] += RNDVECTMIN;
@ -394,18 +394,18 @@ int fillRndVector(ulong size, double* v){
} }
//convention true result in @a, toCheck in @b //convention true result in @a, toCheck in @b
int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax){ int doubleVectorsDiff(double* a, double* b, idx_t n,double* diffMax){
int out = EXIT_SUCCESS; int out = EXIT_SUCCESS;
double diff,diffAbs,_diffMax=0; double diff,diffAbs,_diffMax=0;
if (diffMax) *diffMax = 0; if (diffMax) *diffMax = 0;
else diffMax = &_diffMax; else diffMax = &_diffMax;
for (ulong i=0; i<n; i++){ for (idx_t i=0; i<n; i++){
diff = a[i] - b[i]; diff = a[i] - b[i];
diffAbs = ABS( diff ); diffAbs = ABS( diff );
if( diffAbs > DOUBLE_DIFF_THREASH ){ if( diffAbs > DOUBLE_DIFF_THREASH ){
out = EXIT_FAILURE; out = EXIT_FAILURE;
ERRPRINTS("DOUBLE VECTORS DIFF: DOUBLE_DIFF_THREASH=%lf\t<\t" ERRPRINTS("DOUBLE VECTORS DIFF: DOUBLE_DIFF_THREASH=%lf\t<\t"
"|%13lg| = %lf %% of @a[%lu]\n", "|%13lg| = %lf %% of @a[%d]\n",
DOUBLE_DIFF_THREASH,diff,100*diffAbs/ABS(a[i]),i); DOUBLE_DIFF_THREASH,diff,100*diffAbs/ABS(a[i]),i);
#ifdef DOUBLE_VECT_DIFF_EARLY_EXIT #ifdef DOUBLE_VECT_DIFF_EARLY_EXIT
/*#pragma message("DOUBLE_VECT_DIFF_EARLY_EXIT: only first diff double reported")*/ /*#pragma message("DOUBLE_VECT_DIFF_EARLY_EXIT: only first diff double reported")*/
@ -415,7 +415,7 @@ int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax){
if ( ABS(*diffMax) < diffAbs ) *diffMax = diff; if ( ABS(*diffMax) < diffAbs ) *diffMax = diff;
} }
DEBUG{ DEBUG{
printf("\nchecked diff %s"CEND" between 2 double vector of %lu elements" printf("\nchecked diff %s"CEND" between 2 double vector of %d elements"
"\tmax diff: %le %s threash: %le\n", !out?CCC"OK":CCCERR"ERR!", "\tmax diff: %le %s threash: %le\n", !out?CCC"OK":CCCERR"ERR!",
n,*diffMax,*diffMax<DOUBLE_DIFF_THREASH?"<":">=", n,*diffMax,*diffMax<DOUBLE_DIFF_THREASH?"<":">=",
DOUBLE_DIFF_THREASH); DOUBLE_DIFF_THREASH);
@ -427,9 +427,9 @@ int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax){
return out; return out;
} }
void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers){ void printMatrix(double* mat,idx_t m,idx_t n,char justNZMarkers){
printf("printing matrix: %lu x %lu\n",m,n); printf("printing matrix: %d x %d\n",m,n);
ulong i,j; idx_t i,j;
for (i=0;i<m;i++){ for (i=0;i<m;i++){
for (j=0;j<n;j++){ for (j=0;j<n;j++){
if (justNZMarkers) printf("%s",mat[IDX2D(i,j,n)]?".":" "); if (justNZMarkers) printf("%s",mat[IDX2D(i,j,n)]?".":" ");
@ -441,8 +441,8 @@ void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers){
} }
void printVector(double* v,ulong size){ void printVector(double* v,idx_t size){
for( ulong i=0;i<size;i++) printf("%1.1lf ",v[i]); for( idx_t i=0;i<size;i++) printf("%1.1lf ",v[i]);
printf("\n"); printf("\n");
} }
@ -464,7 +464,7 @@ static inline char* searchPatternInStrs(char* pattern,char** strings){
return out; return out;
} }
inline int appendArr(ulong val,APPENDARRAY* list){ inline int appendArr(idx_t val,APPENDARRAY* list){
return 0; //TODO return 0; //TODO
} }

@ -9,13 +9,23 @@ enum impl_types
}; };
CHUNKS_DISTR chunksFair; CHUNKS_DISTR chunksFair;
CHUNKS_DISTR chunksFairFolded;
CHUNKS_DISTR chunksNOOP;
CHUNKS_DISTR_INTERF chunkDistrbFunc=&chunksFairFolded;
static CONFIG Conf = {
.gridRows = 20,
.gridCols = 2,
.symbMMRowImplID = RBTREE,
};
void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz, void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz,
double **a_as_ptr, idx_t **a_ja_ptr, double **a_as_ptr, idx_t **a_ja_ptr,
idx_t **a_irp_ptr, idx_t a_max_row_nz, idx_t **a_irp_ptr,
idx_t b_m, idx_t b_n, idx_t b_nz, idx_t b_m, idx_t b_n, idx_t b_nz,
double **b_as_ptr, idx_t **b_ja_ptr, double **b_as_ptr, idx_t **b_ja_ptr,
idx_t **b_irp_ptr, idx_t b_max_row_nz, idx_t **b_irp_ptr,
enum impl_types impl_choice, enum impl_types impl_choice,
void **accumul, void **accumul,
void **rows_sizes, void **rows_sizes,
@ -24,7 +34,6 @@ void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz,
{ {
int rc; int rc;
spmat a, b; spmat a, b;
CONFIG cfg;
double *a_as = *a_as_ptr; double *a_as = *a_as_ptr;
idx_t *a_ja = *a_ja_ptr; idx_t *a_ja = *a_ja_ptr;
@ -39,15 +48,32 @@ void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz,
b->RL = b_rl; b->RL = b_rl;
#endif // ROWLENS #endif // ROWLENS
// setting up cfg if (!getConfig(&Conf)){
// TODO : CHECK THAT THIS IS COMPATIBLE WITH PSB VERBOSE printf("configuration changed from env");
rc = getConfig(&cfg); }
//save exported parallelization grid configured ... see Config.md in the top folder
//const ushort GRIDROWS = Conf.gridRows;
//const ushort GRIDCOLS = Conf.gridCols;
const ushort ORIG_GRID_CONF[2] = {Conf.gridRows,Conf.gridCols};
int maxThreads = omp_get_max_threads();
Conf.threadNum = (uint) maxThreads;
DEBUG printf("omp_get_max_threads:\t%d\n",maxThreads);
/*
* get exported schedule configuration,
* if schedule != static -> dyn like -> set a chunk division function before omp for
*/
int schedKind_chunk_monotonic[3];
ompGetRuntimeSchedule(schedKind_chunk_monotonic);
Conf.chunkDistrbFunc = &chunksNOOP;
if (schedKind_chunk_monotonic[0] != omp_sched_static)
Conf.chunkDistrbFunc = chunkDistrbFunc;
VERBOSE
printf("%s",Conf.chunkDistrbFunc == &chunksNOOP?"static schedule =>chunkDistrbFunc NOOP\n":"");
// TODO : change chunk distribution with a choice ? int i, row_nz;
cfg.chunkDistrbFunc = &chunksFair;
cfg.threadNum = 1;
printf("irp %d %d %d %d %d\n", a_irp[0], a_irp[1], a_irp[2], a_irp[3], a_irp[4]);
// setting up spmat type matrices // setting up spmat type matrices
a.M = a_m; a.M = a_m;
a.N = a_n; a.N = a_n;
@ -55,7 +81,13 @@ void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz,
a.AS = a_as; a.AS = a_as;
a.JA = a_ja; a.JA = a_ja;
a.IRP = a_irp; a.IRP = a_irp;
a.MAX_ROW_NZ = a_max_row_nz; a.MAX_ROW_NZ = a.IRP[1] - a.IRP[0];
for (i = 1; i < a.M; i ++){
row_nz = a.IRP[i+1] - a.IRP[i];
if (row_nz > a.MAX_ROW_NZ){
a.MAX_ROW_NZ = row_nz;
}
}
b.M = b_m; b.M = b_m;
b.N = b_n; b.N = b_n;
@ -63,13 +95,20 @@ void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz,
b.AS = b_as; b.AS = b_as;
b.JA = b_ja; b.JA = b_ja;
b.IRP = b_irp; b.IRP = b_irp;
b.MAX_ROW_NZ = b_max_row_nz; b.MAX_ROW_NZ = b.IRP[1] - b.IRP[0];
for (i = 1; i < b.M; i ++){
row_nz = b.IRP[i+1] - b.IRP[i];
if (row_nz > b.MAX_ROW_NZ){
b.MAX_ROW_NZ = row_nz;
}
}
// computing the size // computing the size
switch (impl_choice) switch (impl_choice)
{ {
case ROW_BY_ROW_UB: case ROW_BY_ROW_UB:
*nnz = spmmRowByRowCalculateSize_1(&a, &b, &cfg, accumul, rows_sizes, tmp_matrix); *nnz = spmmRowByRowCalculateSize_1(&a, &b, &Conf, accumul, rows_sizes, tmp_matrix);
default: default:
break; break;
} }
@ -79,11 +118,14 @@ void psb_f_spmm_merge_spacc(void **accumul,
void **rows_sizes, void **rows_sizes,
void **tmp_matrix, void **tmp_matrix,
enum impl_types impl_choice, enum impl_types impl_choice,
double **c_as, double **c_as_ptr,
idx_t **c_ja, idx_t **c_ja_ptr,
idx_t **c_irp, idx_t **c_irp_ptr,
int *info) int *info)
{ {
double *c_as = *c_as_ptr;
idx_t *c_ja = *c_ja_ptr;
idx_t *c_irp = *c_irp_ptr;
// merging the rows into the correct arrays // merging the rows into the correct arrays
switch (impl_choice) switch (impl_choice)
{ {

@ -39,10 +39,10 @@
//hold SPMM result over a unpartitionated space among threads-row[s' blocks] //hold SPMM result over a unpartitionated space among threads-row[s' blocks]
typedef struct{ typedef struct{
//space to hold SPMM output //space to hold SPMM output
ulong* JA; idx_t* JA;
double* AS; double* AS;
ulong size; //num of entries allocated -> only dbg checks idx_t size; //num of entries allocated -> only dbg checks
ulong lastAssigned; //last JA&AS assigned index to an accumulator(atom) idx_t lastAssigned; //last JA&AS assigned index to an accumulator(atom)
SPACC* accs; //SPARSIFIED ACC POINTERS SPACC* accs; //SPARSIFIED ACC POINTERS
uint accsNum; uint accsNum;
} SPMM_ACC; //accumulator for SPMM } SPMM_ACC; //accumulator for SPMM

@ -61,7 +61,7 @@ SPMM CAT(spmmRowByRow_,OFF_F);
idx_t CAT(spmmRowByRowCalculateSize_,OFF_F) (spmat* A, spmat*B, CONFIG* cfg, void** accumul, void **rows_sizes, void** tmp_matrix); idx_t CAT(spmmRowByRowCalculateSize_,OFF_F) (spmat* A, spmat*B, CONFIG* cfg, void** accumul, void **rows_sizes, void** tmp_matrix);
void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double** AS, idx_t** JA, idx_t** IRP); void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double* AS, idx_t* JA, idx_t* IRP);
/* /*
* sparse parallel implementation of @A * @B parallelizing Gustavson * sparse parallel implementation of @A * @B parallelizing Gustavson

@ -37,12 +37,12 @@
* both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen * both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen
*/ */
inline void CAT(scSparseVectMul_,OFF_F) inline void CAT(scSparseVectMul_,OFF_F)
(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux){ (double scalar,double* vectVals,idx_t* vectIdxs,idx_t vectLen, ACC_DENSE* aux){
for (ulong i=0,j; i<vectLen; i++){ for (idx_t i=0,j; i<vectLen; i++){
j = vectIdxs[i]-OFF_F; j = vectIdxs[i]-OFF_F;
DEBUGCHECKS{ DEBUGCHECKS{
if (j>=aux->vLen){ if (j>=aux->vLen){
fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen); fprintf(stderr,"index %d outside vLen %d\n",j,aux->vLen);
assert(j < aux->vLen); assert(j < aux->vLen);
} }
} }
@ -65,12 +65,12 @@ inline void CAT(scSparseVectMul_,OFF_F)
* both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen * both of accumulator's dense array and nnzIdx in @aux and has to be big @vectLen
*/ */
inline void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals, inline void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,
ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux){ idx_t* vectIdxs,idx_t vectLen,idx_t startIdx,ACC_DENSE* aux){
for (ulong i=0,j; i<vectLen; i++){ for (idx_t i=0,j; i<vectLen; i++){
j = vectIdxs[i]-OFF_F - startIdx; j = vectIdxs[i]-OFF_F - startIdx;
DEBUGCHECKS{ DEBUGCHECKS{
if (j>=aux->vLen){ if (j>=aux->vLen){
fprintf(stderr,"index %lu outside vLen %lu\n",j,aux->vLen); fprintf(stderr,"index %d outside vLen %d\n",j,aux->vLen);
assert(j < aux->vLen); assert(j < aux->vLen);
} }
} }
@ -90,9 +90,9 @@ inline void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,
#endif #endif
///TODO IMPLEMENT SCALAR<->ROW MUL AS GENERIC SPARSE VECTOR<->SCALAR MUL ///TODO IMPLEMENT SCALAR<->ROW MUL AS GENERIC SPARSE VECTOR<->SCALAR MUL
//inline void scSparseRowMul(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){ //inline void scSparseRowMul(double scalar,spmat* mat,idx_t trgtR, ACC_DENSE* aux){
inline void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux){ inline void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,idx_t trgtR, ACC_DENSE* aux){
ulong rowStartIdx = mat->IRP[trgtR]-OFF_F,rowLen; idx_t rowStartIdx = mat->IRP[trgtR]-OFF_F,rowLen;
#ifdef ROWLENS #ifdef ROWLENS
rowLen = mat->RL[trgtR]; rowLen = mat->RL[trgtR];
#else #else
@ -116,7 +116,6 @@ inline idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B){
ERRPRINT("spMMSizeUpperbound: rowSizes calloc errd\n"); ERRPRINT("spMMSizeUpperbound: rowSizes calloc errd\n");
return NULL; return NULL;
} }
printf("A->IRP %ld %ld %ld %ld %ld\n", A->IRP[0], A->IRP[1], A->IRP[2], A->IRP[3], A->IRP[4]);
idx_t fullMatBound = 0; idx_t fullMatBound = 0;
#pragma omp parallel for schedule(static) reduction(+:fullMatBound) #pragma omp parallel for schedule(static) reduction(+:fullMatBound)
for (idx_t r=0; r<A->M; r++){ for (idx_t r=0; r<A->M; r++){
@ -135,7 +134,7 @@ inline idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B){
rowSizes[A->M] = fullMatBound; rowSizes[A->M] = fullMatBound;
AUDIT_INTERNAL_TIMES End= omp_get_wtime(); AUDIT_INTERNAL_TIMES End= omp_get_wtime();
VERBOSE VERBOSE
printf("spMMSizeUpperbound:%lu\t%le s\n",rowSizes[A->M],End-Start); printf("spMMSizeUpperbound:%d\t%le s\n",rowSizes[A->M],End-Start);
return rowSizes; return rowSizes;
} }
@ -170,7 +169,7 @@ inline idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)
rowPartsSizes[ A->M*gridCols ] = fullMatBound; rowPartsSizes[ A->M*gridCols ] = fullMatBound;
AUDIT_INTERNAL_TIMES End= omp_get_wtime(); AUDIT_INTERNAL_TIMES End= omp_get_wtime();
VERBOSE VERBOSE
printf("spMMSizeUpperboundColParts_:%lu\t%le s\n",rowPartsSizes[A->M],End-Start); printf("spMMSizeUpperboundColParts_:%d\t%le s\n",rowPartsSizes[A->M],End-Start);
return rowPartsSizes; return rowPartsSizes;
} }
@ -182,7 +181,7 @@ inline idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)
#include "utils.h" #include "utils.h"
///Allocs - Free ///Allocs - Free
//SpMM holder of accumulats //SpMM holder of accumulats
inline SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum){ inline SPMM_ACC* initSpMMAcc(idx_t entriesNum, idx_t accumulatorsNum){
SPMM_ACC* out = calloc(1,sizeof(*out)); SPMM_ACC* out = calloc(1,sizeof(*out));
if (!out){ if (!out){
ERRPRINT("initSpMMAcc: out calloc errd\n"); ERRPRINT("initSpMMAcc: out calloc errd\n");
@ -275,7 +274,7 @@ static inline void _sparsifyUB(ACC_DENSE* accV,SPACC* accSparse,idx_t startColAc
//row[Part] sparsified in a thread safe (exactly long) reserved area using atomics //row[Part] sparsified in a thread safe (exactly long) reserved area using atomics
static inline void sparsifyUBNoPartsBounds static inline void sparsifyUBNoPartsBounds
(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc){ (SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, idx_t startColAcc){
//sort nnz indexes of dense accumulator //sort nnz indexes of dense accumulator
idx_t nnz = accV->nnzIdxMap.len; idx_t nnz = accV->nnzIdxMap.len;
idx_t sparsifyStartV; //start index(inside @accSparse) of @accV to sparsify idx_t sparsifyStartV; //start index(inside @accSparse) of @accV to sparsify
@ -305,11 +304,11 @@ static inline void sparsifyUBNoPartsBounds
*/ */
inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat, inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,
CONFIG* conf){ CONFIG* conf){
ulong nzNum=0,j,rLen,idx,partsNum = mat->M * conf->gridCols; idx_t nzNum=0,j,rLen,idx,partsNum = mat->M * conf->gridCols;
//TODO PARALLEL MERGE - SAVE STARTING offset OF EACH PARTITION IN THE OUT MATRIX //TODO PARALLEL MERGE - SAVE STARTING offset OF EACH PARTITION IN THE OUT MATRIX
ulong* rowsPartsOffsets=alloca(partsNum*sizeof(*rowsPartsOffsets)); idx_t* rowsPartsOffsets=alloca(partsNum*sizeof(*rowsPartsOffsets));
///count nnz entries and alloc arrays for them ///count nnz entries and alloc arrays for them
for (ulong r=0; r<mat->M; r++){ for (idx_t r=0; r<mat->M; r++){
//for each partition ->get len -> outMat.IRP and aux offsets //for each partition ->get len -> outMat.IRP and aux offsets
for (j=0,rLen=0; j<conf->gridCols; j++){ for (j=0,rLen=0; j<conf->gridCols; j++){
idx = IDX2D(r,j,conf->gridCols); idx = IDX2D(r,j,conf->gridCols);
@ -332,20 +331,20 @@ inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,
return EXIT_FAILURE; return EXIT_FAILURE;
} }
///popolate with rows nnz values and indexes ///popolate with rows nnz values and indexes
ulong pLen; //omp for aux vars idx_t pLen; //omp for aux vars
#pragma omp parallel for schedule(static) private(pLen) #pragma omp parallel for schedule(static) private(pLen)
for (ulong i=0; i<partsNum; i++){ for (idx_t i=0; i<partsNum; i++){
pLen = rowsParts[i].len; pLen = rowsParts[i].len;
memcpy(mat->AS + rowsPartsOffsets[i],rowsParts[i].AS,pLen*sizeof(*(mat->AS))); memcpy(mat->AS + rowsPartsOffsets[i],rowsParts[i].AS,pLen*sizeof(*(mat->AS)));
memcpy(mat->JA + rowsPartsOffsets[i],rowsParts[i].JA,pLen*sizeof(*(mat->JA))); memcpy(mat->JA + rowsPartsOffsets[i],rowsParts[i].JA,pLen*sizeof(*(mat->JA)));
} }
CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually
for (ulong i=0,w=0; i<mat->M; i++){ for (idx_t i=0,w=0; i<mat->M; i++){
if (mat->IRP[i] != w) if (mat->IRP[i] != w)
{ERRPRINT("MERGE ROW ERR IRP\n");return -1;} {ERRPRINT("MERGE ROW ERR IRP\n");return -1;}
for (j=0; j<conf->gridCols; j++){ for (j=0; j<conf->gridCols; j++){
SPACC r = rowsParts[IDX2D(i,j,conf->gridCols)]; SPACC r = rowsParts[IDX2D(i,j,conf->gridCols)];
for (ulong jj=0; jj<r.len; jj++,w++){ for (idx_t jj=0; jj<r.len; jj++,w++){
if (mat->AS[w]!= r.AS[jj]){ if (mat->AS[w]!= r.AS[jj]){
ERRPRINT("MERGE ROW ERR AS\n"); return -1;} ERRPRINT("MERGE ROW ERR AS\n"); return -1;}
if (mat->JA[w]!= r.JA[jj]){ if (mat->JA[w]!= r.JA[jj]){
@ -363,9 +362,9 @@ inline int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,
* allocd arrays to hold non zero values and indexes into @mat * allocd arrays to hold non zero values and indexes into @mat
*/ */
inline int mergeRows(SPACC* rows,spmat* mat){ inline int mergeRows(SPACC* rows,spmat* mat){
ulong nzNum=0; idx_t nzNum=0;
//count nnz entries and alloc arrays for them //count nnz entries and alloc arrays for them
for (ulong r=0; r<mat->M; ++r){ for (idx_t r=0; r<mat->M; ++r){
nzNum += rows[r].len; nzNum += rows[r].len;
mat->IRP[r+1] = nzNum; mat->IRP[r+1] = nzNum;
#ifdef ROWLENS #ifdef ROWLENS
@ -384,15 +383,15 @@ inline int mergeRows(SPACC* rows,spmat* mat){
///POPOLATE WITH ROWS NNZ VALUES AND INDEXES ///POPOLATE WITH ROWS NNZ VALUES AND INDEXES
//TODO PARALLEL COPY //TODO PARALLEL COPY
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (ulong r=0; r<mat->M; r++){ for (idx_t r=0; r<mat->M; r++){
memcpy(mat->AS+mat->IRP[r], rows[r].AS, rows[r].len*sizeof(*(mat->AS))); memcpy(mat->AS+mat->IRP[r], rows[r].AS, rows[r].len*sizeof(*(mat->AS)));
memcpy(mat->JA+mat->IRP[r], rows[r].JA, rows[r].len*sizeof(*(mat->JA))); memcpy(mat->JA+mat->IRP[r], rows[r].JA, rows[r].len*sizeof(*(mat->JA)));
} }
CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually CONSISTENCY_CHECKS{ //TODO REMOVE written nnz check manually
for (ulong r=0,i=0; r<mat->M; r++){ for (idx_t r=0,i=0; r<mat->M; r++){
if (i != mat->IRP[r]) if (i != mat->IRP[r])
{ERRPRINT("MERGE ROW ERR IRP\n");return -1;} {ERRPRINT("MERGE ROW ERR IRP\n");return -1;}
for (ulong j=0; j<rows[r].len; j++,i++){ for (idx_t j=0; j<rows[r].len; j++,i++){
if (mat->AS[i]!= rows[r].AS[j]){ if (mat->AS[i]!= rows[r].AS[j]){
ERRPRINT("MERGE ROW ERR AS\n"); return -1;} ERRPRINT("MERGE ROW ERR AS\n"); return -1;}
if (mat->JA[i] != rows[r].JA[j]){ if (mat->JA[i] != rows[r].JA[j]){
@ -415,9 +414,9 @@ inline int mergeRows(SPACC* rows,spmat* mat){
*/ */
inline idx_t calculateSize(SPACC* rows, spmat *mat){ inline idx_t calculateSize(SPACC* rows, spmat *mat){
ulong nzNum=0; idx_t nzNum=0;
//count nnz entries and alloc arrays for them //count nnz entries and alloc arrays for them
for (ulong r=0; r<mat->M; ++r){ for (idx_t r=0; r<mat->M; ++r){
nzNum += rows[r].len; nzNum += rows[r].len;
mat->IRP[r+1] = nzNum; mat->IRP[r+1] = nzNum;
#ifdef ROWLENS #ifdef ROWLENS
@ -429,16 +428,17 @@ inline idx_t calculateSize(SPACC* rows, spmat *mat){
return nzNum; return nzNum;
} }
inline int mergeRowsPopulate(SPACC* rows, spmat* mat, double** AS, idx_t** JA, idx_t** IRP) { inline int mergeRowsPopulate(SPACC* rows, spmat* mat, double* AS, idx_t* JA, idx_t* IRP) {
// Populate with rows' non-zero values and indexes // Populate with rows' non-zero values and indexes
// TODO PARALLEL COPY // TODO PARALLEL COPY
#pragma omp parallel for schedule(static) // #pragma omp parallel for schedule(static)
for (ulong r = 0; r < mat->M; r++) { for (idx_t r = 0; r < mat->M; r++) {
memcpy(*AS + mat->IRP[r], rows[r].AS, rows[r].len * sizeof(double)); fprintf(stderr, "r %d, AS %p, mat->IRP[r] %d, rows[r].AS %p, rows[r].len %d\n", r,AS,mat->IRP[r],rows[r].AS, rows[r].len);
memcpy(*JA + mat->IRP[r], rows[r].JA, rows[r].len * sizeof(idx_t)); memcpy(AS + mat->IRP[r], rows[r].AS, rows[r].len * sizeof(double));
memcpy(JA + mat->IRP[r], rows[r].JA, rows[r].len * sizeof(idx_t));
} }
memcpy(*IRP, mat->IRP, (mat->M + 1) * sizeof(idx_t)); memcpy(IRP, mat->IRP, (mat->M + 1) * sizeof(idx_t));
// Consistency checks // Consistency checks
// TODO: Add your consistency checks here // TODO: Add your consistency checks here

@ -40,11 +40,11 @@ typedef enum{
// //
typedef struct{ typedef struct{
ushort gridRows; int gridRows;
ushort gridCols; int gridCols;
//TODO FULL CONFIG DOCCED HERE //TODO FULL CONFIG DOCCED HERE
ROW_MMSYM_IMPL_MODE symbMMRowImplID; //how to compute symb mul (if required) ROW_MMSYM_IMPL_MODE symbMMRowImplID; //how to compute symb mul (if required)
uint threadNum; //thread num to use in an OMP parallel region ... int threadNum; //thread num to use in an OMP parallel region ...
void* chunkDistrbFunc; //CHUNKS_DISTR_INTERF func pntr void* chunkDistrbFunc; //CHUNKS_DISTR_INTERF func pntr
} CONFIG; } CONFIG;
///Smart controls ///Smart controls

@ -33,17 +33,17 @@ void cleanRbNodes(rbRoot* root,rbNode* nodes,idx_t nodesNum);
int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key); int rbInsertNewKey(rbRoot *root,rbNode *node, idx_t key);
//void C_FortranShiftIdxs(spmat* outMat); //void C_FortranShiftIdxs(spmat* outMat);
//void Fortran_C_ShiftIdxs(spmat* m); //void Fortran_C_ShiftIdxs(spmat* m);
ACC_DENSE* _initAccVectors(ulong num,ulong size); ACC_DENSE* _initAccVectors(idx_t num,idx_t size);
ACC_DENSE* _initAccVectors_monoalloc(ulong num,ulong size); //TODO PERF WITH NEXT ACC_DENSE* _initAccVectors_monoalloc(idx_t num,idx_t size); //TODO PERF WITH NEXT
SPMM_ACC* initSpMMAcc(ulong entriesNum, ulong accumulatorsNum); SPMM_ACC* initSpMMAcc(idx_t entriesNum, idx_t accumulatorsNum);
idx_t reductionMaxSeq(idx_t* arr,idx_t arrLen); idx_t reductionMaxSeq(idx_t* arr,idx_t arrLen);
int _allocAccDense(ACC_DENSE* v,ulong size); int _allocAccDense(ACC_DENSE* v,idx_t size);
int mergeRows(SPACC* rows,spmat* mat); int mergeRows(SPACC* rows,spmat* mat);
int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,CONFIG* conf); int mergeRowsPartitions(SPACC* rowsParts,spmat* mat,CONFIG* conf);
void _freeAccsDenseChecks(ACC_DENSE* vectors,ulong num); void _freeAccsDenseChecks(ACC_DENSE* vectors,idx_t num);
void _resetAccVect(ACC_DENSE* acc); void _resetAccVect(ACC_DENSE* acc);
void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc); void _resetIdxMap(SPVECT_IDX_DENSE_MAP* acc);
void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen); void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen);
void freeAccsDense(ACC_DENSE* vectors,ulong num); void freeAccsDense(ACC_DENSE* vectors,idx_t num);
void freeSpMMAcc(SPMM_ACC* acc); void freeSpMMAcc(SPMM_ACC* acc);
void sparsifyDenseVect(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, ulong startColAcc); void sparsifyDenseVect(SPMM_ACC* acc,ACC_DENSE* accV,SPACC* accSparse, idx_t startColAcc);

@ -35,9 +35,9 @@
////inline exports ////inline exports
//multi implmentation functions //multi implmentation functions
void CAT(scSparseVectMul_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen, ACC_DENSE* aux); void CAT(scSparseVectMul_,OFF_F)(double scalar,double* vectVals,idx_t* vectIdxs,idx_t vectLen, ACC_DENSE* aux);
void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,ulong* vectIdxs,ulong vectLen,ulong startIdx,ACC_DENSE* aux); void CAT(scSparseVectMulPart_,OFF_F)(double scalar,double* vectVals,idx_t* vectIdxs,idx_t vectLen,idx_t startIdx,ACC_DENSE* aux);
void CAT(_scRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux); void CAT(_scRowMul_,OFF_F)(double scalar,spmat* mat,idx_t trgtR, ACC_DENSE* aux);
void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,ulong trgtR, ACC_DENSE* aux); void CAT(scSparseRowMul_,OFF_F)(double scalar,spmat* mat,idx_t trgtR, ACC_DENSE* aux);
idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B); idx_t* CAT(spMMSizeUpperbound_,OFF_F)(spmat* A,spmat* B);
idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)(spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets); idx_t* CAT(spMMSizeUpperboundColParts_,OFF_F)(spmat* A,spmat* B,ushort gridCols,idx_t* bColPartOffsets);

@ -40,14 +40,14 @@
#include "config.h" #include "config.h"
#include "omp.h" #include "omp.h"
//distribution of @rows|blocks of @matrix, exploiting @config //distribution of @rows|blocks of @matrix, exploiting @config
typedef void (CHUNKS_DISTR ) (ulong,spmat*,CONFIG*); typedef void (CHUNKS_DISTR ) (idx_t,spmat*,CONFIG*);
typedef void (*CHUNKS_DISTR_INTERF ) (ulong,spmat*,CONFIG*); typedef void (*CHUNKS_DISTR_INTERF ) (idx_t,spmat*,CONFIG*);
//NOOP chunks division for manual configuration via export OMP_SCHEDULE //NOOP chunks division for manual configuration via export OMP_SCHEDULE
inline void chunksNOOP(ulong r,spmat* mat,CONFIG* cfg){ return; } inline void chunksNOOP(idx_t r,spmat* mat,CONFIG* cfg){ return; }
#include "ompGetICV.h" #include "ompGetICV.h"
//fair division of @r elements from matrix @mat with threads num in @cfg //fair division of @r elements from matrix @mat with threads num in @cfg
inline void chunksFair(ulong r,spmat* mat,CONFIG* cfg){ inline void chunksFair(idx_t r,spmat* mat,CONFIG* cfg){
assert(cfg->threadNum > 0); //configured target thread num assert(cfg->threadNum > 0); //configured target thread num
omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic; omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic;
omp_get_schedule(&kind,&chunk_size); omp_get_schedule(&kind,&chunk_size);
@ -75,7 +75,7 @@ inline void chunksFair(ulong r,spmat* mat,CONFIG* cfg){
} }
//fair division of @r elements from matrix @mat of threads in @cfg //fair division of @r elements from matrix @mat of threads in @cfg
//subdividing the fair share with a factor of @FAIR_CHUNKS_FOLDING //subdividing the fair share with a factor of @FAIR_CHUNKS_FOLDING
inline void chunksFairFolded(ulong r,spmat* mat,CONFIG* cfg){ inline void chunksFairFolded(idx_t r,spmat* mat,CONFIG* cfg){
assert(cfg->threadNum > 0); //configured target thread num assert(cfg->threadNum > 0); //configured target thread num
omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic; omp_sched_t k,kind; int chunk_size,chunk_size_new=0,monotonic;
omp_get_schedule(&kind,&chunk_size); omp_get_schedule(&kind,&chunk_size);

@ -35,16 +35,16 @@
#include "sparseMatrix.h" #include "sparseMatrix.h"
typedef struct{ typedef struct{
ulong row; idx_t row;
ulong col; idx_t col;
double val; double val;
} entry; //MatrixMarket COO entry } entry; //MatrixMarket COO entry
typedef struct{ typedef struct{
MM_typecode mcode; MM_typecode mcode;
entry* entries; entry* entries;
ulong* rowLens; idx_t* rowLens;
ulong M,N,NZ; //spmat sizes idx_t M,N,NZ; //spmat sizes
} MatrixMarket; } MatrixMarket;
////COO PARSE ////COO PARSE
@ -61,7 +61,7 @@ int MMCheck(MM_typecode typecode);
* return allocated and filled COO entries with the NNZ number into * return allocated and filled COO entries with the NNZ number into
* NO SORT CHECKING HERE * NO SORT CHECKING HERE
*/ */
entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens); entry* MMtoCOO(idx_t* NZ, FILE *fp, MM_typecode mcode,idx_t* rowLens);
////COO -> ANYTHING ELSE CONVERSION ////COO -> ANYTHING ELSE CONVERSION
/* /*
@ -69,13 +69,13 @@ entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens);
* EXPECTED: CSR arrays allocated, @entries col sorted in (not madatory consecut) rows * EXPECTED: CSR arrays allocated, @entries col sorted in (not madatory consecut) rows
* [simmetrical parts explicitly rappresented --> not important here] * [simmetrical parts explicitly rappresented --> not important here]
*/ */
int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens); int COOtoCSR(entry* entries, spmat* mat,idx_t* rowLens);
/* /*
* write COO entries in @entries inside sparse matrix @mat in ELL format * write COO entries in @entries inside sparse matrix @mat in ELL format
* EXPECTED: @entries col sorted in (not madatory consecut) rows * EXPECTED: @entries col sorted in (not madatory consecut) rows
* ELL internal array allocated in this function, not freed in case of error * ELL internal array allocated in this function, not freed in case of error
*/ */
int COOtoELL(entry* entries, spmat* mat, ulong* rowLens); int COOtoELL(entry* entries, spmat* mat, idx_t* rowLens);
////wrapper MM -> specialized target ////wrapper MM -> specialized target
/* /*
* Parse MatrixMarket matrix stored in file at @matPath * Parse MatrixMarket matrix stored in file at @matPath

@ -75,7 +75,7 @@ typedef struct{
idx_t vLen; //size of the aux dense vector //TODO USELESS? idx_t vLen; //size of the aux dense vector //TODO USELESS?
SPVECT_IDX_DENSE_MAP nnzIdxMap; // SPVECT_IDX_DENSE_MAP nnzIdxMap; //
} ACC_DENSE; } ACC_DENSE;
int allocAccDense(ACC_DENSE* v, ulong size); int allocAccDense(ACC_DENSE* v, idx_t size);
/* /*
**** NNZ IDXS PRESENCE FLAGS ACCESS INTERFACE: *** **** NNZ IDXS PRESENCE FLAGS ACCESS INTERFACE: ***

@ -40,19 +40,19 @@
* returning an offsets matrix out[i][j] = start of jth colPartition of row i * returning an offsets matrix out[i][j] = start of jth colPartition of row i
* subdivide @A columns in uniform cols ranges in the output * subdivide @A columns in uniform cols ranges in the output
*/ */
idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols); idx_t* CAT(colsOffsetsPartitioningUnifRanges_,OFF_F)(spmat* A,int gridCols);
/* /*
* partition CSR sparse matrix @A in @gridCols columns partitions as * partition CSR sparse matrix @A in @gridCols columns partitions as
* indipended and allocated sparse matrixes and return them * indipended and allocated sparse matrixes and return them
* subdivide @A columns in uniform cols ranges in the output * subdivide @A columns in uniform cols ranges in the output
*/ */
spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,uint gridCols); spmat* CAT(colsPartitioningUnifRanges_,OFF_F)(spmat* A,int gridCols);
//same as above but with (aux) use of offsets partitoning (also returned if colOffsets!=NULL //same as above but with (aux) use of offsets partitoning (also returned if colOffsets!=NULL
spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,uint gridCols,idx_t** colPartsOffsets); spmat* CAT(colsPartitioningUnifRangesOffsetsAux_,OFF_F)(spmat* A,int gridCols,idx_t** colPartsOffsets);
//same as checkOverallocPercent but with 2D partitioning - CSR col partitioning //same as checkOverallocPercent but with 2D partitioning - CSR col partitioning
void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB, void CAT(checkOverallocRowPartsPercent_,OFF_F)(idx_t* forecastedSizes,spmat* AB,
idx_t gridCols,idx_t* bColOffsets); idx_t gridCols,idx_t* bColOffsets);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -62,13 +62,13 @@ void CAT(checkOverallocRowPartsPercent_,OFF_F)(ulong* forecastedSizes,spmat* AB,
//shift every index about the sparse data for use the matrix in a fortran app //shift every index about the sparse data for use the matrix in a fortran app
inline void C_FortranShiftIdxs(spmat* m){ inline void C_FortranShiftIdxs(spmat* m){
for(ulong r=0; r<m->M+1; m -> IRP[r]++,r++); for(idx_t r=0; r<m->M+1; m -> IRP[r]++,r++);
for(ulong i=0; i<m->NZ; m -> JA[i]++, i++); for(idx_t i=0; i<m->NZ; m -> JA[i]++, i++);
} }
//shift every index about the sparse data for use the matric in a C app //shift every index about the sparse data for use the matric in a C app
inline void Fortran_C_ShiftIdxs(spmat* m){ //TODO DBG ONLY and compleatness inline void Fortran_C_ShiftIdxs(spmat* m){ //TODO DBG ONLY and compleatness
for(ulong r=0; r<m->M+1; m -> IRP[r]--,r++); for(idx_t r=0; r<m->M+1; m -> IRP[r]--,r++);
for(ulong i=0; i<m->NZ; m -> JA[i]--, i++); for(idx_t i=0; i<m->NZ; m -> JA[i]--, i++);
} }
/* /*
@ -77,7 +77,7 @@ inline void Fortran_C_ShiftIdxs(spmat* m){ //TODO DBG ONLY and compleatness
* in @forecastedSizes there's for each row -> forecasted size * in @forecastedSizes there's for each row -> forecasted size
* and in the last entry the cumulative of the whole matrix * and in the last entry the cumulative of the whole matrix
*/ */
void checkOverallocPercent(ulong* forecastedSizes,spmat* AB); void checkOverallocPercent(idx_t* forecastedSizes,spmat* AB);
/* /*
check if sparse matrixes A<->B differ up to check if sparse matrixes A<->B differ up to
DOUBLE_DIFF_THREASH per element DOUBLE_DIFF_THREASH per element
@ -86,7 +86,7 @@ int spmatDiff(spmat* A, spmat* B);
////dyn alloc of spMM output matrix ////dyn alloc of spMM output matrix
/* /*
///size prediction of AB = @A * @B ///size prediction of AB = @A * @B
inline ulong SpMMPreAlloc(spmat* A,spmat* B){ inline idx_t SpMMPreAlloc(spmat* A,spmat* B){
//TODO BETTER PREALLOC HEURISTICS HERE //TODO BETTER PREALLOC HEURISTICS HERE
return MAX(A->NZ,B->NZ); return MAX(A->NZ,B->NZ);
} }
@ -110,7 +110,7 @@ inline spmat* initSpMatrixSpMM(spmat* A, spmat* B){
#define REALLOC_FACTOR 1.5 #define REALLOC_FACTOR 1.5
//realloc sparse matrix NZ arrays //realloc sparse matrix NZ arrays
inline int reallocSpMatrix(spmat* mat,ulong newSize){ inline int reallocSpMatrix(spmat* mat,idx_t newSize){
mat->NZ *= newSize; mat->NZ *= newSize;
void* tmp; void* tmp;
if (!(tmp = realloc(mat->AS,mat->NZ * sizeof(*(mat->AS))))){ if (!(tmp = realloc(mat->AS,mat->NZ * sizeof(*(mat->AS))))){
@ -133,6 +133,6 @@ void printSparseMatrix(spmat* sparseMat,char justNZMarkers);
/*convert @sparseMat sparse matrix in dense matrix returned*/ /*convert @sparseMat sparse matrix in dense matrix returned*/
double* CSRToDense(spmat* sparseMat); double* CSRToDense(spmat* sparseMat);
void freeAccsDense(ACC_DENSE* vectors,ulong num); void freeAccsDense(ACC_DENSE* vectors,idx_t num);
#endif //SPARSEUTILS_H_COMMON_IDX_IMPLS #endif //SPARSEUTILS_H_COMMON_IDX_IMPLS

@ -58,18 +58,18 @@ int createNewFile(char* const outFpath);
#define DOUBLE_STR_FORMAT "%25le\n" #define DOUBLE_STR_FORMAT "%25le\n"
//write double vector @v as row sequence of double at @fpath //write double vector @v as row sequence of double at @fpath
//e.g. read with od -tf8 -w8 fpath : OCTALOFFSET: DOUBLE FULL DIGITS //e.g. read with od -tf8 -w8 fpath : OCTALOFFSET: DOUBLE FULL DIGITS
int writeDoubleVector(char* fpath,double* v,ulong size); int writeDoubleVector(char* fpath,double* v,idx_t size);
/* /*
* read vector of double [Str] of arbitrary size from @fpath, true lenght in *size * read vector of double [Str] of arbitrary size from @fpath, true lenght in *size
* if size point to a nnz value, the initial allocation will be of *size * if size point to a nnz value, the initial allocation will be of *size
* eventual successive reallocation done multipling *size with VECTOR_STEP_REALLOC * eventual successive reallocation done multipling *size with VECTOR_STEP_REALLOC
*/ */
double* readDoubleVector(char* fpath,ulong* size); double* readDoubleVector(char* fpath,idx_t* size);
double* readDoubleVectorStr(char* fpath,ulong* size); double* readDoubleVectorStr(char* fpath,idx_t* size);
///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF ///STRUCTURED DATA IO -- BUFFERED: FSCANF - FPRINTF
//dual of readDoubleVectorVector //dual of readDoubleVectorVector
int writeDoubleVectorAsStr(char* fpath,double* v,ulong size); int writeDoubleVectorAsStr(char* fpath,double* v,idx_t size);
int MPI_Dims_create(int nnodes, int ndims, int dims[]); //commons/ompi_dims_create/ompi_dims_create.c int MPI_Dims_create(int nnodes, int ndims, int dims[]); //commons/ompi_dims_create/ompi_dims_create.c
#include "config.h" #include "config.h"
@ -81,16 +81,16 @@ int getConfig(CONFIG* conf);
//append only list implemented with a reallocated array //append only list implemented with a reallocated array
typedef struct{ typedef struct{
ulong* a; idx_t* a;
ulong size; idx_t size;
ulong lastIdx; idx_t lastIdx;
} APPENDARRAY; } APPENDARRAY;
//append @val to @list, reallocating if reached end //append @val to @list, reallocating if reached end
//TODO inline int appendArr(ulong val,APPENDARRAY* list); //TODO inline int appendArr(idx_t val,APPENDARRAY* list);
void sortuint(uint* arr, uint len); //sort uint array @arr of @len elements void sortuint(uint* arr, uint len); //sort uint array @arr of @len elements
void sort_idx_t(idx_t* arr, idx_t len); void sort_idx_t(idx_t* arr, idx_t len);
void sortulong(ulong* arr, ulong len); void sortidx_t(idx_t* arr, idx_t len);
void sortRbNode(rbNode* arr,idx_t len); void sortRbNode(rbNode* arr,idx_t len);
///ranges functions ///ranges functions
@ -157,9 +157,9 @@ inline idx_t reductionMaxOmp(idx_t* arr,idx_t arrLen){
* of the 2 entries of the input vectors, signed as a[i] - b[i] (as well as dump prints) * of the 2 entries of the input vectors, signed as a[i] - b[i] (as well as dump prints)
* CONVENTION: @a = true result, @b vector to check with the true result * CONVENTION: @a = true result, @b vector to check with the true result
*/ */
int doubleVectorsDiff(double* a, double* b, ulong n,double* diffMax); int doubleVectorsDiff(double* a, double* b, idx_t n,double* diffMax);
//fill a random vector in @v long @size doubles //fill a random vector in @v long @size doubles
int fillRndVector(ulong size, double* v); int fillRndVector(idx_t size, double* v);
//read vector as a sequence of space separated double from file at @fpath //read vector as a sequence of space separated double from file at @fpath
#define VECTOR_STEP_MALLOC 100 #define VECTOR_STEP_MALLOC 100
@ -174,8 +174,8 @@ int fillRndVector(ulong size, double* v);
int extractInTmpFS(char* path, char* tmpFsDecompressPath); int extractInTmpFS(char* path, char* tmpFsDecompressPath);
//compute E[@values] in @out[0] and VAR[@values] in @out[1] of @numVals values //compute E[@values] in @out[0] and VAR[@values] in @out[1] of @numVals values
void statsAvgVar(double* values,uint numVals, double* out); void statsAvgVar(double* values,uint numVals, double* out);
void printMatrix(double* mat,ulong m,ulong n,char justNZMarkers); void printMatrix(double* mat,idx_t m,idx_t n,char justNZMarkers);
void printVector(double* v,ulong size); void printVector(double* v,idx_t size);
void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen); void assertArrNoRepetitions(idx_t* arrSorted, idx_t arrLen);
#endif #endif

@ -52,10 +52,10 @@ int MMCheck(MM_typecode mcode) {
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens){ entry* MMtoCOO(idx_t* NZ, FILE *fp, MM_typecode mcode,idx_t* rowLens){
int scanndRet=0; int scanndRet=0;
ulong nzTrgt=*NZ,nzIdx=0; //expanded num of nz (in case of sym matrix) idx_t nzTrgt=*NZ,nzIdx=0; //expanded num of nz (in case of sym matrix)
ulong diagEntries=0, row = 0, col = 0;//current entry's row,col from MM -> 1 based idx_t diagEntries=0, row = 0, col = 0;//current entry's row,col from MM -> 1 based
double val = 0; double val = 0;
entry* entries = NULL; //COO parsed entries entry* entries = NULL; //COO parsed entries
///init ///init
@ -70,10 +70,10 @@ entry* MMtoCOO(ulong* NZ, FILE *fp, MM_typecode mcode,ulong* rowLens){
///parse MM fp lines into COOordinate entries ///parse MM fp lines into COOordinate entries
while (1) { // Reading the fp until EOF while (1) { // Reading the fp until EOF
if (mm_is_pattern(mcode)){ if (mm_is_pattern(mcode)){
scanndRet = fscanf(fp, "%lu %lu\n", &row, &col); scanndRet = fscanf(fp, "%d %d\n", &row, &col);
val = 1.0; val = 1.0;
} else if (mm_is_real(mcode) || (mm_is_integer(mcode))){ } else if (mm_is_real(mcode) || (mm_is_integer(mcode))){
scanndRet = fscanf(fp, "%lu %lu %lf\n", &row, &col, &val); scanndRet = fscanf(fp, "%d %d %lf\n", &row, &col, &val);
} }
@ -168,11 +168,11 @@ MatrixMarket* MMRead(char* matPath){
////COO -> ANYTHING ELSE CONVERSION ////COO -> ANYTHING ELSE CONVERSION
int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){ int COOtoCSR(entry* entries, spmat* mat,idx_t* rowLens){
int out = EXIT_FAILURE; int out = EXIT_FAILURE;
ulong idx; idx_t idx;
long* _rowsLastCol = NULL; //for each row -> last added entry's columnIdx long* _rowsLastCol = NULL; //for each row -> last added entry's columnIdx
ulong* rowsNextIdx = NULL; //for each row -> next entry progressive idx idx_t* rowsNextIdx = NULL; //for each row -> next entry progressive idx
if (!(rowsNextIdx = calloc(mat->M,sizeof(*rowsNextIdx)))){ if (!(rowsNextIdx = calloc(mat->M,sizeof(*rowsNextIdx)))){
ERRPRINT("MMtoCOO: rowsNextIdx calloc errd\n"); ERRPRINT("MMtoCOO: rowsNextIdx calloc errd\n");
goto _end; goto _end;
@ -186,9 +186,9 @@ int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){
} }
/*TODO OLD /*TODO OLD
* //get rowLens->IRP (partial), TODO moved MMtoCOO to avoid FULL rescan entries * //get rowLens->IRP (partial), TODO moved MMtoCOO to avoid FULL rescan entries
* for (ulong i=0; i<mat->NZ; i++) mat->IRP[entries[i].row+1]++; * for (idx_t i=0; i<mat->NZ; i++) mat->IRP[entries[i].row+1]++;
* memcpy(mat->RL,mat->IRP + 1,sizeof(*mat->IRP) * mat->M); //TODO in next ifdef * memcpy(mat->RL,mat->IRP + 1,sizeof(*mat->IRP) * mat->M); //TODO in next ifdef
* for (ulong i=2; i<mat->M+1; i++) mat->IRP[i] += mat->IRP[i-1]; * for (idx_t i=2; i<mat->M+1; i++) mat->IRP[i] += mat->IRP[i-1];
* OLD2: rowLens memcpy ... no just moved the pointer * OLD2: rowLens memcpy ... no just moved the pointer
* #ifdef ROWLENS * #ifdef ROWLENS
* memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef * memcpy(mat->RL,rowLens,sizeof(*rowLens) * mat->M); //TODO in next ifdef
@ -197,19 +197,19 @@ int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){
//IRP: trasform rows lens as increments to build row index "pointer" //IRP: trasform rows lens as increments to build row index "pointer"
//0th -> 0 mandatory; 1th = 0th row len, ...., M+1th = end of Mth row //0th -> 0 mandatory; 1th = 0th row len, ...., M+1th = end of Mth row
memcpy(mat->IRP+1,rowLens,sizeof(*rowLens) * mat->M);//init IRP with rows lens memcpy(mat->IRP+1,rowLens,sizeof(*rowLens) * mat->M);//init IRP with rows lens
for (ulong i=2; i<mat->M+1; i++) mat->IRP[i] += mat->IRP[i-1]; for (idx_t i=2; i<mat->M+1; i++) mat->IRP[i] += mat->IRP[i-1];
CONSISTENCY_CHECKS assert(mat->IRP[mat->M] == mat->NZ); CONSISTENCY_CHECKS assert(mat->IRP[mat->M] == mat->NZ);
///FILL ///FILL
//TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS //TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS
//sorted for each row (even nn sequential in @entries) //sorted for each row (even nn sequential in @entries)
//entries write in CSR format //entries write in CSR format
entry* e; entry* e;
for (ulong i=0; i<mat->NZ; i++) { for (idx_t i=0; i<mat->NZ; i++) {
e = entries+i; e = entries+i;
CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE SORTED CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE SORTED
/*#pragma message("COO sorting check enabled")*/ /*#pragma message("COO sorting check enabled")*/
if (_rowsLastCol[e->row] >= (long) e->col){ if (_rowsLastCol[e->row] >= (long) e->col){
ERRPRINTS("not sorted entry:%ld,%ld,%lf",e->row,e->col,e->val); ERRPRINTS("not sorted entry:%d,%d,%lf",e->row,e->col,e->val);
goto _end; goto _end;
} }
_rowsLastCol[e->row] = e->col; _rowsLastCol[e->row] = e->col;
@ -228,16 +228,16 @@ int COOtoCSR(entry* entries, spmat* mat,ulong* rowLens){
return out; return out;
} }
int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){ int COOtoELL(entry* entries, spmat* mat, idx_t* rowLens){
int out=EXIT_FAILURE; int out=EXIT_FAILURE;
ulong maxRow = 0, col, _ellEntriesTot, *rowsNextCol; idx_t maxRow = 0, col, _ellEntriesTot, *rowsNextCol;
long* _rowsLastCol=NULL; long* _rowsLastCol=NULL;
entry* e; entry* e;
for (ulong i=0; i<mat->M; i++) maxRow = MAX(maxRow,rowLens[i]); for (idx_t i=0; i<mat->M; i++) maxRow = MAX(maxRow,rowLens[i]);
_ellEntriesTot = 2*mat->M*maxRow; _ellEntriesTot = 2*mat->M*maxRow;
#ifdef LIMIT_ELL_SIZE #ifdef LIMIT_ELL_SIZE
if ( _ellEntriesTot > ELL_MAX_ENTRIES ){ if ( _ellEntriesTot > ELL_MAX_ENTRIES ){
ERRPRINTS("Required entries %lu -> %lu uMB for the matrix exceed the " ERRPRINTS("Required entries %d -> %lu uMB for the matrix exceed the "
"designated threashold of: %lu -> %lu MB for ellpack\n", "designated threashold of: %lu -> %lu MB for ellpack\n",
_ellEntriesTot,(sizeof(double)*_ellEntriesTot) >> 20, _ellEntriesTot,(sizeof(double)*_ellEntriesTot) >> 20,
ELL_MAX_ENTRIES,(sizeof(double)*ELL_MAX_ENTRIES) >> 20); ELL_MAX_ENTRIES,(sizeof(double)*ELL_MAX_ENTRIES) >> 20);
@ -274,12 +274,12 @@ int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){
///FILL NZ ///FILL NZ
//TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS //TODO EXPECTED entries with .col entries -> CONSISTENCY_CHECKS
//sorted for each row (even nn sequential in @entries) //sorted for each row (even nn sequential in @entries)
for (ulong i=0; i<mat->NZ; i++){ for (idx_t i=0; i<mat->NZ; i++){
e = entries + i; e = entries + i;
CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE COLS SORTED for righe successive CONSISTENCY_CHECKS{ //TODO CHECK IF COO ENTRIES ARE COLS SORTED for righe successive
/*#pragma message("COO sorting check enabled")*/ /*#pragma message("COO sorting check enabled")*/
if (_rowsLastCol[e->row] >= (long) e->col){ if (_rowsLastCol[e->row] >= (long) e->col){
ERRPRINTS("not sorted entry:%ld,%ld,%lf", ERRPRINTS("not sorted entry:%d,%d,%lf",
e->row,e->col,e->val); e->row,e->col,e->val);
goto _end; goto _end;
} }
@ -291,16 +291,16 @@ int COOtoELL(entry* entries, spmat* mat, ulong* rowLens){
} }
///FILL PAD ///FILL PAD
ulong padded = 0,paddedEntries = mat->M*mat->MAX_ROW_NZ; idx_t padded = 0,paddedEntries = mat->M*mat->MAX_ROW_NZ;
for (ulong r=0; r<mat->M; r++){ for (idx_t r=0; r<mat->M; r++){
for (ulong c=rowLens[r],j=IDX2D(r,c,maxRow); c<maxRow; c++,j++,padded++){ for (idx_t c=rowLens[r],j=IDX2D(r,c,maxRow); c<maxRow; c++,j++,padded++){
//mat->AS[j] = ELL_AS_FILLER; //TODO ALREADY DONE IN CALLOC //mat->AS[j] = ELL_AS_FILLER; //TODO ALREADY DONE IN CALLOC
//mat->JA[j] = mat->JA[rowLens[r]-1]; //ELL_JA_FILLER; //TODO calloc CUDA benefit? //mat->JA[j] = mat->JA[rowLens[r]-1]; //ELL_JA_FILLER; //TODO calloc CUDA benefit?
} }
} }
VERBOSE{ VERBOSE{
printf("padded %lu entries = %lf%% of NZ\n",padded,100*padded/(double) mat->NZ); printf("padded %d entries = %lf%% of NZ\n",padded,100*padded/(double) mat->NZ);
printf("ELL matrix of: %lu paddedEntries -> %lu MB of JA+AS\n", printf("ELL matrix of: %d paddedEntries -> %ld MB of JA+AS\n",
paddedEntries,(paddedEntries*sizeof(*(mat->JA))+paddedEntries*sizeof(*(mat->AS))) >> 20); paddedEntries,(paddedEntries*sizeof(*(mat->JA))+paddedEntries*sizeof(*(mat->AS))) >> 20);
} }
out = EXIT_SUCCESS; out = EXIT_SUCCESS;
@ -318,7 +318,7 @@ spmat* MMtoCSR(char* matPath){
return NULL; return NULL;
} }
if (!(mat = calloc(1,sizeof(*mat)))){ if (!(mat = calloc(1,sizeof(*mat)))){
ERRPRINT("MMtoCSR: mat struct alloc errd"); ERRPRINT("MMtoCSR: mat struct alloc err\n");
goto err; goto err;
} }
mat -> M = mm->M; mat -> M = mm->M;
@ -345,7 +345,7 @@ spmat* MMtoCSR(char* matPath){
#endif #endif
VERBOSE VERBOSE
printf("MMtoCSR: %lu NZ entries-> %lu MB of AS+JA+IRP\n",mat->NZ, printf("MMtoCSR: %d NZ entries-> %ld MB of AS+JA+IRP\n",mat->NZ,
(mat->NZ*sizeof(*(mat->AS))+mat->NZ*sizeof(*(mat->JA))+(1+mat->M*sizeof(*(mat->IRP))))>>20); (mat->NZ*sizeof(*(mat->AS))+mat->NZ*sizeof(*(mat->JA))+(1+mat->M*sizeof(*(mat->IRP))))>>20);
goto _free; goto _free;

@ -15,24 +15,20 @@ subroutine dspmm(a,b,c,info, impl_choice)
integer(psb_ipk_), intent(in) :: impl_choice integer(psb_ipk_), intent(in) :: impl_choice
! Internal variables ! Internal variables
integer(c_size_t) :: a_m,a_n,a_nz integer(c_int) :: a_m,a_n,a_nz
type(c_ptr) :: a_as,a_ja,a_irp type(c_ptr) :: a_as,a_ja,a_irp
integer(c_size_t) :: a_max_row_nz integer(c_int) :: b_m,b_n,b_nz
integer(c_size_t) :: b_m,b_n,b_nz
type(c_ptr) :: b_as,b_ja,b_irp type(c_ptr) :: b_as,b_ja,b_irp
integer(c_size_t) :: b_max_row_nz
integer(c_int) :: impl_choice_ integer(c_int) :: impl_choice_
type(c_ptr) :: accumul, rows_sizes, tmp_matrix type(c_ptr) :: accumul, rows_sizes, tmp_matrix
integer(c_size_t) :: nnz integer(c_int) :: nnz
type(c_ptr) :: c_as,c_ja,c_irp type(c_ptr) :: c_as,c_ja,c_irp
interface spmm_build_spacc interface spmm_build_spacc
subroutine psb_f_spmm_build_spacc(c_a_m,c_a_n,c_a_nz,& subroutine psb_f_spmm_build_spacc(c_a_m,c_a_n,c_a_nz,&
c_a_as,c_a_ja,c_a_irp,& c_a_as,c_a_ja,c_a_irp,&
c_a_max_row_nz,&
c_b_m,c_b_n,c_b_nz,& c_b_m,c_b_n,c_b_nz,&
c_b_as,c_b_ja,c_b_irp,& c_b_as,c_b_ja,c_b_irp,&
c_b_max_row_nz,&
c_impl_choice,& c_impl_choice,&
c_accumul,& c_accumul,&
c_rows_sizes,& c_rows_sizes,&
@ -41,16 +37,14 @@ subroutine dspmm(a,b,c,info, impl_choice)
c_nnz) bind(C) c_nnz) bind(C)
use iso_c_binding use iso_c_binding
use psb_base_mod use psb_base_mod
integer(c_size_t), intent(in), value :: c_a_m,c_a_n,c_a_nz integer(c_int), intent(in), value :: c_a_m,c_a_n,c_a_nz
type(c_ptr), intent(in) :: c_a_as,c_a_ja,c_a_irp type(c_ptr), intent(in) :: c_a_as,c_a_ja,c_a_irp
integer(c_size_t), intent(in), value :: c_a_max_row_nz integer(c_int), intent(in), value :: c_b_m,c_b_n,c_b_nz
integer(c_size_t), intent(in), value :: c_b_m,c_b_n,c_b_nz
type(c_ptr), intent(in) :: c_b_as,c_b_ja,c_b_irp type(c_ptr), intent(in) :: c_b_as,c_b_ja,c_b_irp
integer(c_size_t), intent(in), value :: c_b_max_row_nz
integer(c_int), intent(in), value :: c_impl_choice integer(c_int), intent(in), value :: c_impl_choice
type(c_ptr), intent(out) :: c_accumul,c_rows_sizes,c_tmp_matrix type(c_ptr), intent(out) :: c_accumul,c_rows_sizes,c_tmp_matrix
integer(psb_ipk_), intent(out) :: c_info integer(psb_ipk_), intent(out) :: c_info
integer(c_size_t), intent(out) :: c_nnz integer(c_int), intent(out) :: c_nnz
end subroutine psb_f_spmm_build_spacc end subroutine psb_f_spmm_build_spacc
end interface spmm_build_spacc end interface spmm_build_spacc
@ -74,7 +68,6 @@ subroutine dspmm(a,b,c,info, impl_choice)
a_m = a%get_nrows() a_m = a%get_nrows()
a_n = a%get_ncols() a_n = a%get_ncols()
a_nz = a%get_nzeros() a_nz = a%get_nzeros()
write(*,*) 'IRP(1:5) ',a%irp(1:5)
a_as = c_loc(a%val) a_as = c_loc(a%val)
a_ja = c_loc(a%ja) a_ja = c_loc(a%ja)
a_irp = c_loc(a%irp) a_irp = c_loc(a%irp)
@ -89,24 +82,25 @@ subroutine dspmm(a,b,c,info, impl_choice)
! call calculateSize ! call calculateSize
call psb_f_spmm_build_spacc(a_m,a_n,a_nz,a_as,& call psb_f_spmm_build_spacc(a_m,a_n,a_nz,a_as,&
a_ja,a_irp,a_max_row_nz,& a_ja,a_irp,&
b_m,b_n,b_nz,b_as,b_ja,& b_m,b_n,b_nz,b_as,b_ja,&
b_irp,b_max_row_nz,& b_irp,&
impl_choice_,accumul,& impl_choice_,accumul,&
rows_sizes,tmp_matrix,& rows_sizes,tmp_matrix,&
info,nnz) info,nnz)
! allocate c%val, c%ja and c%irp ! allocate c%val, c%ja and c%irp
allocate(c%val(nnz)) call psb_realloc(nnz,c%val,info)
allocate(c%ja(nnz)) if (info /= 0) write(*,*) "reallocation failed"
allocate(c%irp(a_m + 1)) call psb_realloc(nnz,c%ja,info)
call psb_realloc(a_m+1,c%irp,info)
! c_as = c_loc(c%val) c_as = c_loc(c%val)
! c_ja = c_loc(c%ja) c_ja = c_loc(c%ja)
! c_irp = c_loc(c%irp) c_irp = c_loc(c%irp)
! c%set_nrows(a_m) call c%set_nrows(a_m)
! c%set_ncols(b_n) call c%set_ncols(b_n)
! call spmmRowByRowPopulate ! call spmmRowByRowPopulate
call psb_f_spmm_merge_spacc(accumul,& call psb_f_spmm_merge_spacc(accumul,&

Loading…
Cancel
Save