finished interface for spmm with uperbound method

sp3mm-interface
wlthr 2 years ago
parent 250163f1bc
commit bfd88ca2c1

2
.gitignore vendored

@ -21,3 +21,5 @@ autom4te.cache
# the executable from tests
runs
# editor generated files
.vscode/*

@ -1,7 +0,0 @@
{
"fortran.fortls.path": "/usr/bin/fortls",
"files.associations": {
"sp3mm_csr_omp_multi.h": "c",
"sp3mm_csr_omp_ub_generic.h": "c"
}
}

@ -10,7 +10,7 @@ BOBJS=psb_base_mat_impl.o \
SOBJS=psb_s_csr_impl.o psb_s_coo_impl.o psb_s_csc_impl.o psb_s_mat_impl.o
#\
psb_s_lcoo_impl.o psb_s_lcsr_impl.o
DOBJS=psb_d_csr_impl.o psb_d_coo_impl.o psb_d_csc_impl.o psb_d_mat_impl.o
DOBJS=psb_d_csr_impl.o psb_d_coo_impl.o psb_d_csc_impl.o psb_d_mat_impl.o sp3mm_impl.o
#\
psb_d_lcoo_impl.o psb_d_lcsr_impl.o
COBJS=psb_c_csr_impl.o psb_c_coo_impl.o psb_c_csc_impl.o psb_c_mat_impl.o
@ -20,7 +20,7 @@ ZOBJS=psb_z_csr_impl.o psb_z_coo_impl.o psb_z_csc_impl.o psb_z_mat_impl.o
#\
psb_z_lcoo_impl.o psb_z_lcsr_impl.o
OBJS=$(BOBJS) $(SOBJS) $(DOBJS) $(COBJS) $(ZOBJS)
OBJS=$(BOBJS) $(SOBJS) $(DOBJS) $(COBJS) $(ZOBJS)
#
# Where the library should go, and how it is called.

@ -3395,7 +3395,6 @@ contains
& nzc,nnzre, isz, ipb, irwsz, nrc, nze
real(psb_dpk_) :: cfb
info = psb_success_
ma = a%get_nrows()
na = a%get_ncols()
@ -3411,26 +3410,18 @@ contains
! conversion
! available choices of implementation
enum, bind(C)
enumerator :: SPMM_ROW_BY_ROW_UB = 1
enumerator SPMM_ROW_BY_ROW_SYMB_NUM
enumerator SPMM_ROW_BY_ROW_1D_BLOCKS_SYMB_NUM
enumerator SPMM_ROW_BY_ROW_2D_BLOCKS_SYMB_NUM
end enum
select case (spmm_impl_id)
case (SPMM_ROW_BY_ROW_UB)
! call spmm_row_by_row_ub
case (SPMM_ROW_BY_ROW_SYMB_NUM)
! call spmm_row_by_row_symb_num
case (SPMM_ROW_BY_ROW_1D_BLOCKS_SYMB_NUM)
! call spmm_row_by_row_1d_blocks_symb_num
case (SPMM_ROW_BY_ROW_2D_BLOCKS_SYMB_NUM)
! call spmm_row_by_row_2d_blocks_symb_num
case default
! call default choice
end select
! select case (spmm_impl_id)
! case (SPMM_ROW_BY_ROW_UB)
! ! call spmm_row_by_row_ub
! case (SPMM_ROW_BY_ROW_SYMB_NUM)
! ! call spmm_row_by_row_symb_num
! case (SPMM_ROW_BY_ROW_1D_BLOCKS_SYMB_NUM)
! ! call spmm_row_by_row_1d_blocks_symb_num
! case (SPMM_ROW_BY_ROW_2D_BLOCKS_SYMB_NUM)
! ! call spmm_row_by_row_2d_blocks_symb_num
! case default
! ! call default choice
! end select
nze = min(size(c%val),size(c%ja))

@ -123,6 +123,98 @@ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
return AB;
}
idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void** accumul, void **rows_sizes, void** tmp_matrix){
DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N);
///thread aux
ACC_DENSE *accVects = NULL,*acc;
SPMM_ACC* outAccumul=NULL;
idx_t* rowsSizes = NULL;
///init AB matrix with SPMM heuristic preallocation
spmat* AB = allocSpMatrix(A->M,B->N);
if (!AB) goto _err;
if (!(rowsSizes = CAT(spMMSizeUpperbound_,OFF_F) (A,B))) goto _err;
///aux structures alloc
if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
ERRPRINT("accVects init failed\n");
goto _err;
}
if (!(outAccumul = initSpMMAcc(rowsSizes[AB->M],AB->M))) goto _err;
#if SPARSIFY_PRE_PARTITIONING == T
//prepare sparse accumulators with U.Bounded rows[parts] starts
SPACC* accSp;
for( idx_t r=0,rSizeCumul=0; r<AB->M; rSizeCumul += rowsSizes[r++]){
accSp = outAccumul->accs+r;
accSp->JA = outAccumul->JA + rSizeCumul;
accSp->AS = outAccumul->AS + rSizeCumul;
//accSp->len = rowsSizes[r];
}
#endif
((CHUNKS_DISTR_INTERF) cfg->chunkDistrbFunc) (AB->M,AB,cfg);
AUDIT_INTERNAL_TIMES Start=omp_get_wtime();
#pragma omp parallel for schedule(runtime) private(acc)
for (ulong r=0; r<A->M; r++){ //row-by-row formulation
//iterate over nz entry index c inside current row r
acc = accVects + omp_get_thread_num();
/* direct use of sparse scalar vector multiplication
for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; ja<A->IRP[r+1]-OFF_F; ja++){
ca = A->JA[ja] - OFF_F;
jb = B->IRP[ca] - OFF_F;
bRowLen = B->IRP[ca+1] - B->IRP[ca];
CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc);
}*/
for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
//trasform accumulated dense vector to a CSR row
#if SPARSIFY_PRE_PARTITIONING == T
_sparsifyUB(acc,outAccumul->accs+r,0);
#else
sparsifyUBNoPartsBounds(outAccumul,acc,outAccumul->accs + r,0);
#endif
_resetAccVect(acc); //rezero for the next A row
}
///calculate exact number of non zero elements
idx_t nnz;
nnz = calculateSize(outAccumul->accs,AB);
/// put the sparse accumulator into the argument so that
/// it can be retrived in Fortran
*accumul = outAccumul;
*rows_sizes = rowsSizes;
*tmp_matrix = AB;
if(accVects) freeAccsDense(accVects,cfg->threadNum);
return nnz;
_err:
if(AB) freeSpmat(AB);
AB=NULL; //nothing'll be returned
_free:
if(rowsSizes) free(rowsSizes);
if(accVects) freeAccsDense(accVects,cfg->threadNum);
if(outAccumul) freeSpMMAcc(outAccumul);
}
void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double** AS, idx_t** JA, idx_t** IRP){
SPMM_ACC* outAccumul= *accumul;
idx_t* rowsSizes = *rows_sizes;
spmat *AB = *tmp_matrix;
mergeRowsPopulate(outAccumul->accs, AB, AS, JA, IRP);
#if OFF_F != 0
C_FortranShiftIdxs(AB);
#endif
AUDIT_INTERNAL_TIMES End=omp_get_wtime();
DEBUG checkOverallocPercent(rowsSizes,AB);
if(AB) freeSpmat(AB);
if(rowsSizes) free(rowsSizes);
if(outAccumul) freeSpMMAcc(outAccumul);
}
spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N);
DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);

@ -1,79 +1,31 @@
#include "../include/Sp3MM_CSR_OMP_Multi.h"
#include "../include/utils.h"
enum impl_types {ROW_BY_ROW_UB};
enum impl_types
{
ROW_BY_ROW_UB
};
/**
* @brief performs multiplication of two sparse matrices
* A and B stored in the CRS format. The resulting matrix is C
*
* @param[in] a_m number of columns of A
* @param[in] a_n number of rows of A
* @param[in] a_nz number of non zero elements of A
* @param[in] a_as array with the non zero coefficients of A
* @param[in] a_ja array with the column indices of the non
* zero coefficients of A
* @param[in] a_irp array with the indices of the beginning of
* each row in a_as and a_ja
* @param[in] a_rl array with the lengths of each rows of A
* @param[in] a_max_row_nz maximum number of coefficients in a row in A
* @param[in] b_m number of columns of B
* @param[in] b_n number of rows of B
* @param[in] b_nz number of non zero elements of B
* @param[in] b_as array with the non zero coefficients of B
* @param[in] b_ja array with the column indices of the non
* zero coefficients of B
* @param[in] b_irp array with the indices of the beginning of
* each row in b_as and b_ja
* @param[in] b_rl array with the lengths of each rows of B
* @param[in] b_max_row_nz maximum number of coefficients in a row in B
* @param[in] impl_choice implementation choice
* @param[out] c_m number of colums of C
* @param[out] c_n number of rows of C
* @param[out] c_nz number of non zero elements in C
* @param[out] c_as array with the non zero coefficients of C
* @param[out] c_ja array with the column indices of the non
* zero coefficients of C
* @param[out] c_irp array with the indices of the beginning of
* each row in c_as and c_ja
* @param[out] c_rl array with the lengths of each rows of C
* @param[out] c_max_row_nz maximum number of coefficients in a row in C
* @param[out] info return value to check if the operation was successful
*/
#ifdef ROWLENS
void psb_f_spmm(idx_t a_m, idx_t a_n, idx_t a_nz,
double *a_as, idx_t *a_ja,
idx_t *a_irp, idx_t *a_rl, idx_t a_max_row_nz,
idx_t b_m, idx_t b_n, idx_t b_nz,
double *b_as, idx_t *b_ja,
idx_t *b_irp, idx_t *b_rl, idx_t b_max_row_nz,
enum impl_types impl_choice,
idx_t *c_m, idx_t *c_n, idx_t *c_nz,
double **c_as, idx_t **c_ja,
idx_t **c_irp, idx_t **c_rl, idx_t *c_max_row_nz,
int info)
#else
void psb_f_spmm(idx_t a_m, idx_t a_n, idx_t a_nz,
double *a_as, idx_t *a_ja,
idx_t *a_irp, idx_t a_max_row_nz,
idx_t b_m, idx_t b_n, idx_t b_nz,
double *b_as, idx_t *b_ja,
idx_t *b_irp, idx_t b_max_row_nz,
enum impl_types impl_choice,
idx_t *c_m, idx_t *c_n, idx_t *c_nz,
double **c_as, idx_t **c_ja,
idx_t **c_irp, idx_t *c_max_row_nz,
int *info)
#endif
void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz,
double *a_as, idx_t *a_ja,
idx_t *a_irp, idx_t a_max_row_nz,
idx_t b_m, idx_t b_n, idx_t b_nz,
double *b_as, idx_t *b_ja,
idx_t *b_irp, idx_t b_max_row_nz,
enum impl_types impl_choice,
void **accumul,
void **rows_sizes,
void **tmp_matrix,
idx_t *nnz)
{
int rc;
spmat *a, *b, *c;
CONFIG *cfg;
#ifdef ROWLENS
#ifdef ROWLENS
a->RL = a_rl;
b->RL = b_rl;
#endif//ROWLENS
#endif // ROWLENS
// setting up cfg
// TODO : CHECK THAT THIS IS COMPATIBLE WITH PSB
@ -96,24 +48,32 @@ void psb_f_spmm(idx_t a_m, idx_t a_n, idx_t a_nz,
b->IRP = b_irp;
b->MAX_ROW_NZ = b_max_row_nz;
// performing spmm
// computing the size
switch (impl_choice)
{
case ROW_BY_ROW_UB:
c = spmmRowByRow_0(a, b, cfg);
break;
*nnz = spmmRowByRowCalculateSize_0(a, b, cfg, accumul, rows_sizes, tmp_matrix);
default:
break;
}
}
// output result
*(c_m) = c->M;
*(c_n) = c->N;
*(c_nz)= c->NZ;
*(c_as)= c->AS;
*(c_ja)= c->JA;
*(c_irp)=c->IRP;
#ifdef ROWLENS
*(c_rl)= c->RL;
#endif
void psb_f_spmm_merge_spacc(void **accumul,
void **rows_sizes,
void **tmp_matrix,
enum impl_types impl_choice,
double **c_as,
idx_t **c_ja,
idx_t **c_irp,
int *info)
{
// merging the rows into the correct arrays
switch (impl_choice)
{
case ROW_BY_ROW_UB:
*info = spmmRowByRowPopulate_0(accumul, rows_sizes, tmp_matrix, c_as, c_ja, c_irp);
break;
default:
break;
}
}

@ -402,5 +402,48 @@ inline int mergeRows(SPACC* rows,spmat* mat){
return EXIT_SUCCESS;
}
/***************************************************************
* The Following code is a modified version of the mergeRows
* function.
* The function is split into two parts :
* - the first one calculates the sizes of the arrays
* and returns it
* - the second one fills them accordingly
* This allows for allocation of the arrays as allocatables in
* Fortran
*/
inline idx_t calculateSize(SPACC* rows, spmat *mat){
ulong nzNum=0;
//count nnz entries and alloc arrays for them
for (ulong r=0; r<mat->M; ++r){
nzNum += rows[r].len;
mat->IRP[r+1] = nzNum;
#ifdef ROWLENS
mat->RL[r] = rows[r].len
#endif
}
mat->NZ = nzNum;
return nzNum;
}
inline int mergeRowsPopulate(SPACC* rows, spmat* mat, double** AS, idx_t** JA, idx_t** IRP) {
// Populate with rows' non-zero values and indexes
// TODO PARALLEL COPY
#pragma omp parallel for schedule(static)
for (ulong r = 0; r < mat->M; r++) {
memcpy(*AS + mat->IRP[r], rows[r].AS, rows[r].len * sizeof(double));
memcpy(*JA + mat->IRP[r], rows[r].JA, rows[r].len * sizeof(idx_t));
}
memcpy(*IRP, mat->IRP, (mat->M + 1) * sizeof(idx_t));
// Consistency checks
// TODO: Add your consistency checks here
return EXIT_SUCCESS;
}
#endif //SPMMUTILS_H_SINGLE_IMPLEMENTATION

@ -1,41 +1,101 @@
subroutine dspmm(a,b,c,info, impl_choice)
! TODO :
! * Split the C function into two subroutines:
! - Memory estimation
! - Actual computation
! * Call estimation function
! * Allocate c matrix arrays
! * pass the arrays as c pointers to the computation routine
use psb_d_mat_mod
use iso_c_binding
implicit none
type(psb_d_csr_sparse_mat), intent(in) :: a,b
type(psb_d_csr_sparse_mat), intent(out):: c
type(psb_d_csr_sparse_mat), intent(inout):: c
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: impl_choice
! Internal variables
integer(c_size_t), value :: a_m,a_n,a_nz
real(c_double) :: a_as
integer(c_size_t) :: a_ja,a_irp
integer(c_size_t):: a_m,a_n,a_nz
real(c_double), pointer :: a_as(:)
integer(c_size_t), pointer :: a_ja(:),a_irp(:)
type(c_ptr) :: a_as_ptr,a_ja_ptr,a_irp_ptr
integer(c_size_t) :: a_max_row_nz
integer(c_size_t) :: b_m,b_n,b_nz
real(c_double), pointer :: b_as(:)
integer(c_size_t), pointer :: b_ja(:),b_irp(:)
type(c_ptr) :: b_as_ptr,b_ja_ptr,b_irp_ptr
integer(c_size_t) :: b_max_row_nz
integer(c_int) :: impl_choice_
type(c_ptr) :: accumul, rows_sizes, tmp_matrix
integer(c_size_t) :: nnz
real(c_double), pointer :: c_as(:)
integer(c_size_t), pointer :: c_ja(:),c_irp(:)
type(c_ptr) :: c_as_ptr,c_ja_ptr,c_irp_ptr
integer(c_size_t), value :: a_max_row_nz
integer(c_size_t), value :: b_m,b_n,b_nz
real(c_double) :: b_as
integer(c_size_t) :: b_ja,b_irp
integer(c_size_t), value :: b_max_row_nz
integer(c_int), value :: impl_choice_
integer(c_size_t), value :: c_m,c_n,c_nz
real(c_double) :: c_as
integer(c_size_t) :: c_ja,c_irp
integer(c_size_t), value :: c_max_row_nz
interface spmm_build_spacc
subroutine psb_f_spmm_build_spacc(c_a_m,c_a_n,c_a_nz,&
c_a_as,c_a_ja,c_a_irp,&
c_a_max_row_nz,&
c_b_m,c_b_n,c_b_nz,&
c_b_as,c_b_ja,c_b_irp,&
c_b_max_row_nz,&
c_impl_choice,&
c_accumul,&
c_rows_sizes,&
c_tmp_matrix,&
c_info,&
c_nnz) bind(C)
use iso_c_binding
use psb_base_mod
integer(c_size_t), intent(in), value :: c_a_m,c_a_n,c_a_nz
type(c_ptr), intent(in) :: c_a_as,c_a_ja,c_a_irp
integer(c_size_t), intent(in), value :: c_a_max_row_nz
integer(c_size_t), intent(in), value :: c_b_m,c_b_n,c_b_nz
type(c_ptr), intent(in) :: c_b_as,c_b_ja,c_b_irp
integer(c_size_t), intent(in), value :: c_b_max_row_nz
integer(c_int), intent(in), value :: c_impl_choice
type(c_ptr), intent(out) :: c_accumul,c_rows_sizes,c_tmp_matrix
integer(psb_ipk_), intent(out) :: c_info
integer(c_size_t), intent(out) :: c_nnz
end subroutine psb_f_spmm_build_spacc
end interface spmm_build_spacc
interface spmm_row_by_row_populate
subroutine psb_f_spmm_merge_spacc(c_accumul,&
c_rows_sizes,&
c_tmp_matrix,&
c_impl_choice,&
c_as,c_ja,c_irp,&
c_info) bind(C)
use iso_c_binding
use psb_base_mod
type(c_ptr), intent(in) :: c_accumul,c_rows_sizes,c_tmp_matrix
integer(c_int), intent(in), value :: c_impl_choice
integer(psb_ipk_), intent(out) :: c_info
type(c_ptr), intent(out) :: c_as,c_ja,c_irp
end subroutine psb_f_spmm_merge_spacc
end interface spmm_row_by_row_populate
! Initializing internal variables
a_m = a%get_nrows()
a_n = a%get_ncols()
a_nz = a%get_nzeros()
a_as = a%val
a_as_ptr = c_loc(a_as)
a_ja = a%ja
a_ja_ptr = c_loc(a_ja)
a_irp = a%irp
! a_max_row_nz
a_irp_ptr = c_loc(a_irp)
! ! a_max_row_nz
b_m = b%get_nrows()
b_n = b%get_ncols()
b_nz = b%get_nzeros()
b_as = b%val
b_as_ptr = c_loc(b_as)
b_ja = b%ja
b_ja_ptr = c_loc(b_ja)
b_irp = b%irp
b_irp_ptr = c_loc(b_irp)
if (present(impl_choice)) then
impl_choice_ = impl_choice
@ -43,53 +103,38 @@ subroutine dspmm(a,b,c,info, impl_choice)
impl_choice_ = 0
end if
! Calling psb_f_spmm
psb_f_spmm(a_m,a_n,a_nz,&
& a_as,a_ja,a_irp,&
& a_max_row_nz,&
& b_m,b_n,b_nz,&
& b_as,b_ja,b_irp,&
& impl_choice_,&
& c_m,c_n,c_nz,&
& c_as,c_ja,c_irp,&
& c_max_row_nz,&
& info)
! call calculateSize
call psb_f_spmm_build_spacc(a_m,a_n,a_nz,a_as_ptr,&
a_ja_ptr,a_irp_ptr,a_max_row_nz,&
b_m,b_n,b_nz,b_as_ptr,b_ja_ptr,&
b_irp_ptr,b_max_row_nz,&
impl_choice_,accumul,&
rows_sizes,tmp_matrix,&
info,nnz)
! Putting results in c
c%m = c_m
c%n = c_n
c%val = c_as
c%ja = c_ja
c%irp = c_irp
! allocate c%val, c%ja and c%irp
allocate(c%val(nnz))
allocate(c%ja(nnz))
allocate(c%irp(a_m + 1))
c_as = c%val
c_as_ptr = c_loc(c_as)
c_ja = c%ja
c_ja_ptr = c_loc(c_ja)
c_irp = c%irp
c_irp_ptr = c_loc(c_irp)
! c%set_nrows(a_m)
! c%set_ncols(b_n)
! call spmmRowByRowPopulate
call psb_f_spmm_merge_spacc(accumul,&
rows_sizes,&
tmp_matrix,&
impl_choice_,&
c_as_ptr,&
c_ja_ptr,&
c_irp_ptr,&
info)
interface
subroutine psb_f_spmm(c_a_m,c_a_n,c_a_nz,&
& c_a_as,c_a_ja,c_a_irp,&
& c_a_max_row_nz,
& c_b_m,c_b_n,c_b_nz,&
& c_b_as,c_b_ja,c_b_irp,&
& c_b_max_row_nz,
& c_impl_choice,&
& c_c_m,c_c_n,c_c_nz,&
& c_c_as,c_c_ja,c_c_irp,
& c_c_max_row_nz,&
& c_info) bind(c)
use iso_c_binding
use psb_base_mod
integer(c_size_t), intent(in), value :: c_a_m,c_a_n,c_a_nz
real(c_double), intent(in) :: c_a_as
integer(c_size_t), intent(in) :: c_a_ja,c_a_irp
integer(c_size_t), intent(in), value :: c_a_max_row_nz
integer(c_size_t), intent(in), value :: c_b_m,c_b_n,c_b_nz
real(c_double), intent(in) :: c_b_as
integer(c_size_t), intent(in) :: c_b_ja,c_b_irp
integer(c_size_t), intent(in), value :: c_b_max_row_nz
integer(c_int), intent(in), value :: c_impl_choice
integer(c_size_t), intent(out), value :: c_c_m,c_c_n,c_c_nz
real(c_double), intent(out) :: c_c_as
integer(c_size_t), intent(out) :: c_c_ja,c_c_irp
integer(c_size_t), intent(out), value :: c_c_max_row_nz
integer(psb_ipk_), intent(out) :: c_info
end subroutine psb_f_spmm
end interface
end subroutine dspmm

@ -40,4 +40,6 @@ module psb_objhandle_mod
type, bind(c) :: psb_c_zspmat
type(c_ptr) :: item = c_null_ptr
end type psb_c_zspmat
end type psb_c_zspmat
end module psb_objhandle_mod
Loading…
Cancel
Save