finished interface for spmm with uperbound method

2 years ago · bfd88ca2c1
parent 250163f1bc
commit bfd88ca2c1
9 changed files with 299 additions and 171 deletions
--- a/.gitignore
+++ b/.gitignore
@ -21,3 +21,5 @@ autom4te.cache
 # the executable from tests
 runs

+# editor generated files
+.vscode/*
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,7 +0,0 @@
-{
-    "fortran.fortls.path": "/usr/bin/fortls",
-    "files.associations": {
-        "sp3mm_csr_omp_multi.h": "c",
-        "sp3mm_csr_omp_ub_generic.h": "c"
-    }
-}
--- a/base/serial/impl/Makefile
+++ b/base/serial/impl/Makefile
@ -10,7 +10,7 @@ BOBJS=psb_base_mat_impl.o \
 SOBJS=psb_s_csr_impl.o psb_s_coo_impl.o psb_s_csc_impl.o psb_s_mat_impl.o
 #\
 psb_s_lcoo_impl.o psb_s_lcsr_impl.o 
-DOBJS=psb_d_csr_impl.o psb_d_coo_impl.o psb_d_csc_impl.o psb_d_mat_impl.o
+DOBJS=psb_d_csr_impl.o psb_d_coo_impl.o psb_d_csc_impl.o psb_d_mat_impl.o sp3mm_impl.o
 #\
 psb_d_lcoo_impl.o  psb_d_lcsr_impl.o 
 COBJS=psb_c_csr_impl.o psb_c_coo_impl.o psb_c_csc_impl.o psb_c_mat_impl.o
@ -20,7 +20,7 @@ ZOBJS=psb_z_csr_impl.o psb_z_coo_impl.o psb_z_csc_impl.o psb_z_mat_impl.o
 #\
 psb_z_lcoo_impl.o  psb_z_lcsr_impl.o 

-OBJS=$(BOBJS) $(SOBJS)  $(DOBJS)  $(COBJS)  $(ZOBJS) 
+OBJS=$(BOBJS) $(SOBJS)  $(DOBJS)  $(COBJS)  $(ZOBJS)

 #
 # Where the library should go, and how it is called. 
--- a/base/serial/impl/psb_d_csr_impl.F90
+++ b/base/serial/impl/psb_d_csr_impl.F90
@ -3395,7 +3395,6 @@ contains
         & nzc,nnzre, isz, ipb, irwsz, nrc, nze
    real(psb_dpk_)                 :: cfb

-
    info = psb_success_
    ma = a%get_nrows()
    na = a%get_ncols()
@ -3411,26 +3410,18 @@ contains

    ! conversion

-    ! available choices of implementation
-    enum, bind(C)
-      enumerator :: SPMM_ROW_BY_ROW_UB = 1
-      enumerator SPMM_ROW_BY_ROW_SYMB_NUM
-      enumerator SPMM_ROW_BY_ROW_1D_BLOCKS_SYMB_NUM
-      enumerator SPMM_ROW_BY_ROW_2D_BLOCKS_SYMB_NUM
-    end enum
-
-    select case (spmm_impl_id)
-    case (SPMM_ROW_BY_ROW_UB)
-      ! call spmm_row_by_row_ub
-    case (SPMM_ROW_BY_ROW_SYMB_NUM)
-      ! call spmm_row_by_row_symb_num
-    case (SPMM_ROW_BY_ROW_1D_BLOCKS_SYMB_NUM)
-      ! call spmm_row_by_row_1d_blocks_symb_num
-    case (SPMM_ROW_BY_ROW_2D_BLOCKS_SYMB_NUM)
-      ! call spmm_row_by_row_2d_blocks_symb_num
-    case default
-      ! call default choice
-    end select
+    ! select case (spmm_impl_id)
+    ! case (SPMM_ROW_BY_ROW_UB)
+    !   ! call spmm_row_by_row_ub
+    ! case (SPMM_ROW_BY_ROW_SYMB_NUM)
+    !   ! call spmm_row_by_row_symb_num
+    ! case (SPMM_ROW_BY_ROW_1D_BLOCKS_SYMB_NUM)
+    !   ! call spmm_row_by_row_1d_blocks_symb_num
+    ! case (SPMM_ROW_BY_ROW_2D_BLOCKS_SYMB_NUM)
+    !   ! call spmm_row_by_row_2d_blocks_symb_num
+    ! case default
+    !   ! call default choice
+    ! end select

    
    nze = min(size(c%val),size(c%ja))
--- a/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c
+++ b/base/serial/impl/sp3mm4amg/Sp3MM_CSR_OMP_UB_Generic.c
@ -123,6 +123,98 @@ spmat* CAT(spmmRowByRow_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
 	return AB;
 }

+idx_t CAT(spmmRowByRowCalculateSize_,OFF_F)(spmat* A, spmat*B, CONFIG* cfg, void** accumul, void **rows_sizes, void** tmp_matrix){
+	DEBUG printf("spmm\trows of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N);
+	///thread aux
+	ACC_DENSE *accVects = NULL,*acc;
+	SPMM_ACC* outAccumul=NULL;
+	idx_t* rowsSizes = NULL;
+	///init AB matrix with SPMM heuristic preallocation
+	spmat* AB = allocSpMatrix(A->M,B->N);
+	if (!AB)	goto _err;
+	if (!(rowsSizes = CAT(spMMSizeUpperbound_,OFF_F) (A,B)))   goto _err;
+	///aux structures alloc 
+	if (!(accVects = _initAccVectors(cfg->threadNum,AB->N))){
+		ERRPRINT("accVects init failed\n");
+		goto _err;
+	}
+	if (!(outAccumul = initSpMMAcc(rowsSizes[AB->M],AB->M)))  goto _err;
+	#if SPARSIFY_PRE_PARTITIONING == T
+	//prepare sparse accumulators with U.Bounded rows[parts] starts
+	SPACC* accSp;
+	for( idx_t r=0,rSizeCumul=0; r<AB->M; rSizeCumul += rowsSizes[r++]){
+		accSp 		= outAccumul->accs+r;
+		accSp->JA 	= outAccumul->JA + rSizeCumul;
+		accSp->AS 	= outAccumul->AS + rSizeCumul;
+		//accSp->len	= rowsSizes[r];
+	}
+	#endif
+
+	((CHUNKS_DISTR_INTERF)	cfg->chunkDistrbFunc) (AB->M,AB,cfg);
+	AUDIT_INTERNAL_TIMES	Start=omp_get_wtime();
+	#pragma omp parallel for schedule(runtime) private(acc)
+	for (ulong r=0;  r<A->M; r++){	//row-by-row formulation
+		//iterate over nz entry index c inside current row r
+		acc = accVects + omp_get_thread_num();
+		/* direct use of sparse scalar vector multiplication
+		for (idx_t ja=A->IRP[r]-OFF_F,ca,jb,bRowLen; ja<A->IRP[r+1]-OFF_F; ja++){
+			ca = A->JA[ja]  - OFF_F;
+			jb = B->IRP[ca] - OFF_F;
+			bRowLen = B->IRP[ca+1] - B->IRP[ca];
+			CAT(scSparseVectMul_,OFF_F)(A->AS[ja],B->AS+jb,B->JA+jb,bRowLen,acc);
+		}*/
+		for (ulong c=A->IRP[r]-OFF_F; c<A->IRP[r+1]-OFF_F; c++) //row-by-row formul
+			CAT(scSparseRowMul_,OFF_F)(A->AS[c], B, A->JA[c]-OFF_F, acc);
+		//trasform accumulated dense vector to a CSR row
+		#if SPARSIFY_PRE_PARTITIONING == T
+		_sparsifyUB(acc,outAccumul->accs+r,0);
+		#else
+		sparsifyUBNoPartsBounds(outAccumul,acc,outAccumul->accs + r,0);
+		#endif
+		_resetAccVect(acc);   //rezero for the next A row
+	}
+
+	///calculate exact number of non zero elements
+	idx_t nnz;
+	nnz = calculateSize(outAccumul->accs,AB);
+
+	/// put the sparse accumulator into the argument so that
+	/// it can be retrived in Fortran
+	*accumul = outAccumul;
+	*rows_sizes = rowsSizes;
+	*tmp_matrix = AB;
+
+	if(accVects)	freeAccsDense(accVects,cfg->threadNum);
+
+	return nnz;
+
+	_err:
+	if(AB)  freeSpmat(AB);
+	AB=NULL;	//nothing'll be returned
+	_free:
+	if(rowsSizes)   free(rowsSizes);
+	if(accVects)	freeAccsDense(accVects,cfg->threadNum);
+	if(outAccumul)  freeSpMMAcc(outAccumul);
+}
+
+void CAT(spmmRowByRowPopulate_,OFF_F)(void** accumul, void** rows_sizes, void** tmp_matrix, double** AS, idx_t** JA, idx_t** IRP){
+	SPMM_ACC* outAccumul= *accumul;
+	idx_t* rowsSizes = *rows_sizes;
+	spmat *AB = *tmp_matrix;
+
+	mergeRowsPopulate(outAccumul->accs, AB, AS, JA, IRP);
+	
+	#if OFF_F != 0
+	C_FortranShiftIdxs(AB);
+	#endif
+	AUDIT_INTERNAL_TIMES	End=omp_get_wtime();
+	DEBUG				    checkOverallocPercent(rowsSizes,AB);
+
+	if(AB)  freeSpmat(AB);
+	if(rowsSizes)   free(rowsSizes);
+	if(outAccumul)  freeSpMMAcc(outAccumul);
+}
+
 spmat* CAT(spmmRowByRow1DBlocks_,OFF_F)(spmat* A,spmat* B, CONFIG* cfg){
 	DEBUG printf("spmm\trowBlocks of A,\tfull B\tM=%lu x N=%lu\n",A->M,B->N);
 	DEBUG printf("ompParallelizationGrid:\t%dx%d\n",cfg->gridRows,cfg->gridCols);
--- a/base/serial/impl/sp3mm4amg/fbind/psb_f_spmm_ub.c
+++ b/base/serial/impl/sp3mm4amg/fbind/psb_f_spmm_ub.c
@ -1,79 +1,31 @@
 #include "../include/Sp3MM_CSR_OMP_Multi.h"
 #include "../include/utils.h"

-enum impl_types {ROW_BY_ROW_UB};
+enum impl_types
+{
+    ROW_BY_ROW_UB
+};

-/**
- * @brief performs multiplication of two sparse matrices 
- * A and B stored in the CRS format. The resulting matrix is C
- * 
- * @param[in] a_m           number of columns of A
- * @param[in] a_n           number of rows of A
- * @param[in] a_nz          number of non zero elements of A
- * @param[in] a_as          array with the non zero coefficients of A
- * @param[in] a_ja          array with the column indices of the non 
- *                      zero coefficients of A
- * @param[in] a_irp         array with the indices of the beginning of 
- *                      each row in a_as and a_ja
- * @param[in] a_rl          array with the lengths of each rows of A
- * @param[in] a_max_row_nz  maximum number of coefficients in a row in A
- * @param[in] b_m           number of columns of B
- * @param[in] b_n           number of rows of B
- * @param[in] b_nz          number of non zero elements of B
- * @param[in] b_as          array with the non zero coefficients of B
- * @param[in] b_ja          array with the column indices of the non
- *                      zero coefficients of B
- * @param[in] b_irp         array with the indices of the beginning of
- *                      each row in b_as and b_ja
- * @param[in] b_rl          array with the lengths of each rows of B
- * @param[in] b_max_row_nz  maximum number of coefficients in a row in B
- * @param[in] impl_choice   implementation choice
- * @param[out] c_m          number of colums of C
- * @param[out] c_n          number of rows of C
- * @param[out] c_nz         number of non zero elements in C
- * @param[out] c_as         array with the non zero coefficients of C
- * @param[out] c_ja         array with the column indices of the non
- *                          zero coefficients of C
- * @param[out] c_irp        array with the indices of the beginning of
- *                          each row in c_as and c_ja
- * @param[out] c_rl         array with the lengths of each rows of C
- * @param[out] c_max_row_nz maximum number of coefficients in a row in C
- * @param[out] info         return value to check if the operation was successful
- */
-#ifdef ROWLENS
-void psb_f_spmm(idx_t a_m, idx_t a_n, idx_t a_nz, 
-                                double *a_as, idx_t *a_ja, 
-                                idx_t *a_irp, idx_t *a_rl, idx_t a_max_row_nz, 
-                                idx_t b_m, idx_t b_n, idx_t b_nz, 
-                                double *b_as, idx_t *b_ja, 
-                                idx_t *b_irp, idx_t *b_rl, idx_t b_max_row_nz,
-                                enum impl_types impl_choice,
-                                idx_t *c_m, idx_t *c_n, idx_t *c_nz, 
-                                double **c_as, idx_t **c_ja, 
-                                idx_t **c_irp, idx_t **c_rl, idx_t *c_max_row_nz,
-                                int info)
-#else
-void psb_f_spmm(idx_t a_m, idx_t a_n, idx_t a_nz, 
-                                double *a_as, idx_t *a_ja, 
-                                idx_t *a_irp, idx_t a_max_row_nz, 
-                                idx_t b_m, idx_t b_n, idx_t b_nz, 
-                                double *b_as, idx_t *b_ja, 
-                                idx_t *b_irp, idx_t b_max_row_nz,
-                                enum impl_types impl_choice,
-                                idx_t *c_m, idx_t *c_n, idx_t *c_nz, 
-                                double **c_as, idx_t **c_ja, 
-                                idx_t **c_irp,  idx_t *c_max_row_nz,
-                                int *info)
-#endif                            
+void psb_f_spmm_build_spacc(idx_t a_m, idx_t a_n, idx_t a_nz,
+                             double *a_as, idx_t *a_ja,
+                             idx_t *a_irp, idx_t a_max_row_nz,
+                             idx_t b_m, idx_t b_n, idx_t b_nz,
+                             double *b_as, idx_t *b_ja,
+                             idx_t *b_irp, idx_t b_max_row_nz,
+                             enum impl_types impl_choice,
+                             void **accumul,
+                             void **rows_sizes,
+                             void **tmp_matrix,
+                             idx_t *nnz)
 {
    int rc;
    spmat *a, *b, *c;
    CONFIG *cfg;

-    #ifdef ROWLENS
+#ifdef ROWLENS
    a->RL = a_rl;
    b->RL = b_rl;
-    #endif//ROWLENS
+#endif // ROWLENS

    // setting up cfg
    // TODO : CHECK THAT THIS IS COMPATIBLE WITH PSB
@ -96,24 +48,32 @@ void psb_f_spmm(idx_t a_m, idx_t a_n, idx_t a_nz,
    b->IRP = b_irp;
    b->MAX_ROW_NZ = b_max_row_nz;

-    // performing spmm
+    // computing the size
    switch (impl_choice)
    {
    case ROW_BY_ROW_UB:
-        c = spmmRowByRow_0(a, b, cfg);
-        break;
+        *nnz = spmmRowByRowCalculateSize_0(a, b, cfg, accumul, rows_sizes, tmp_matrix);
    default:
        break;
    }
+}

-    // output result
-    *(c_m) = c->M;
-    *(c_n) = c->N;
-    *(c_nz)= c->NZ;
-    *(c_as)= c->AS;
-    *(c_ja)= c->JA;
-    *(c_irp)=c->IRP;
-    #ifdef ROWLENS
-    *(c_rl)= c->RL;
-    #endif
+void psb_f_spmm_merge_spacc(void **accumul,
+                            void **rows_sizes,
+                            void **tmp_matrix,
+                            enum impl_types impl_choice,
+                            double **c_as,
+                            idx_t **c_ja,
+                            idx_t **c_irp,
+                            int *info)
+{
+    // merging the rows into the correct arrays
+    switch (impl_choice)
+    {
+    case ROW_BY_ROW_UB:
+        *info = spmmRowByRowPopulate_0(accumul, rows_sizes, tmp_matrix, c_as, c_ja, c_irp);
+        break;
+    default:
+        break;
+    }
 }
--- a/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h
+++ b/base/serial/impl/sp3mm4amg/include/SpMMUtilsGeneric.h
@ -402,5 +402,48 @@ inline int mergeRows(SPACC* rows,spmat* mat){
 	return EXIT_SUCCESS;
 } 

+/***************************************************************
+ * The Following code is a modified version of the mergeRows
+ * function.
+ * The function is split into two parts : 
+ * 		- the first one calculates the sizes of the arrays 
+ * 		  and returns it
+ * 		- the second one fills them accordingly
+ * This allows for allocation of the arrays as allocatables in
+ * Fortran
+*/
+
+inline idx_t calculateSize(SPACC* rows, spmat *mat){
+	ulong nzNum=0;
+	//count nnz entries and alloc arrays for them
+	for (ulong r=0;   r<mat->M;   ++r){
+		nzNum += rows[r].len;
+		mat->IRP[r+1] = nzNum;
+		#ifdef ROWLENS
+		mat->RL[r]  = rows[r].len
+		#endif
+	}
+	mat->NZ = nzNum;
+
+	return nzNum;
+}
+
+inline int mergeRowsPopulate(SPACC* rows, spmat* mat, double** AS, idx_t** JA, idx_t** IRP) {
+    // Populate with rows' non-zero values and indexes
+    // TODO PARALLEL COPY
+    #pragma omp parallel for schedule(static)
+    for (ulong r = 0; r < mat->M; r++) {
+        memcpy(*AS + mat->IRP[r], rows[r].AS, rows[r].len * sizeof(double));
+        memcpy(*JA + mat->IRP[r], rows[r].JA, rows[r].len * sizeof(idx_t));
+    }
+    
+	memcpy(*IRP, mat->IRP, (mat->M + 1) * sizeof(idx_t));
+	
+    // Consistency checks
+    // TODO: Add your consistency checks here
+    
+    return EXIT_SUCCESS;
+}
+
 #endif	//SPMMUTILS_H_SINGLE_IMPLEMENTATION

--- a/base/serial/impl/sp3mm_impl.f90
+++ b/base/serial/impl/sp3mm_impl.f90
@ -1,41 +1,101 @@
 subroutine dspmm(a,b,c,info, impl_choice)
+    ! TODO :
+    ! * Split the C function into two subroutines:
+    !    - Memory estimation
+    !    - Actual computation
+    ! * Call estimation function
+    ! * Allocate c matrix arrays
+    ! * pass the arrays as c pointers to the computation routine
    use psb_d_mat_mod
+    use iso_c_binding
    implicit none
    type(psb_d_csr_sparse_mat), intent(in) :: a,b
-    type(psb_d_csr_sparse_mat), intent(out):: c
+    type(psb_d_csr_sparse_mat), intent(inout):: c
    integer(psb_ipk_), intent(out)    :: info
    integer(psb_ipk_), intent(in), optional :: impl_choice

    ! Internal variables
-    integer(c_size_t), value :: a_m,a_n,a_nz
-    real(c_double) :: a_as
-    integer(c_size_t) :: a_ja,a_irp
+    integer(c_size_t):: a_m,a_n,a_nz
+    real(c_double), pointer :: a_as(:)
+    integer(c_size_t), pointer :: a_ja(:),a_irp(:)
+    type(c_ptr) :: a_as_ptr,a_ja_ptr,a_irp_ptr
+    integer(c_size_t) :: a_max_row_nz
+    integer(c_size_t) :: b_m,b_n,b_nz
+    real(c_double), pointer :: b_as(:)
+    integer(c_size_t), pointer :: b_ja(:),b_irp(:)
+    type(c_ptr) :: b_as_ptr,b_ja_ptr,b_irp_ptr
+    integer(c_size_t) :: b_max_row_nz
+    integer(c_int) :: impl_choice_
+    type(c_ptr) :: accumul, rows_sizes, tmp_matrix
+    integer(c_size_t) :: nnz
+    real(c_double), pointer :: c_as(:)
+    integer(c_size_t), pointer :: c_ja(:),c_irp(:)
+    type(c_ptr) :: c_as_ptr,c_ja_ptr,c_irp_ptr

-    integer(c_size_t), value :: a_max_row_nz
-    integer(c_size_t), value :: b_m,b_n,b_nz
-    real(c_double) :: b_as
-    integer(c_size_t) :: b_ja,b_irp
-    integer(c_size_t), value :: b_max_row_nz
-    integer(c_int), value :: impl_choice_
-    integer(c_size_t), value :: c_m,c_n,c_nz
-    real(c_double) :: c_as
-    integer(c_size_t) :: c_ja,c_irp
-    integer(c_size_t), value :: c_max_row_nz
+    interface spmm_build_spacc
+        subroutine psb_f_spmm_build_spacc(c_a_m,c_a_n,c_a_nz,&
+                            c_a_as,c_a_ja,c_a_irp,&
+                            c_a_max_row_nz,&
+                            c_b_m,c_b_n,c_b_nz,&
+                            c_b_as,c_b_ja,c_b_irp,&
+                            c_b_max_row_nz,&
+                            c_impl_choice,&
+                            c_accumul,&
+                            c_rows_sizes,&
+                            c_tmp_matrix,&
+                            c_info,&
+                            c_nnz) bind(C)
+            use iso_c_binding
+            use psb_base_mod
+            integer(c_size_t), intent(in), value :: c_a_m,c_a_n,c_a_nz
+            type(c_ptr), intent(in) :: c_a_as,c_a_ja,c_a_irp
+            integer(c_size_t), intent(in), value :: c_a_max_row_nz
+            integer(c_size_t), intent(in), value :: c_b_m,c_b_n,c_b_nz
+            type(c_ptr), intent(in) :: c_b_as,c_b_ja,c_b_irp
+            integer(c_size_t), intent(in), value :: c_b_max_row_nz
+            integer(c_int), intent(in), value :: c_impl_choice
+            type(c_ptr), intent(out) :: c_accumul,c_rows_sizes,c_tmp_matrix
+            integer(psb_ipk_), intent(out) :: c_info
+            integer(c_size_t), intent(out) :: c_nnz
+        end subroutine psb_f_spmm_build_spacc
+    end interface spmm_build_spacc
+
+    interface spmm_row_by_row_populate
+        subroutine psb_f_spmm_merge_spacc(c_accumul,&
+                                        c_rows_sizes,&
+                                        c_tmp_matrix,&
+                                        c_impl_choice,&
+                                        c_as,c_ja,c_irp,&
+                                        c_info) bind(C)
+            use iso_c_binding
+            use psb_base_mod
+            type(c_ptr), intent(in) :: c_accumul,c_rows_sizes,c_tmp_matrix
+            integer(c_int), intent(in), value :: c_impl_choice
+            integer(psb_ipk_), intent(out) :: c_info
+            type(c_ptr), intent(out) :: c_as,c_ja,c_irp
+        end subroutine psb_f_spmm_merge_spacc
+    end interface spmm_row_by_row_populate

    ! Initializing internal variables
    a_m = a%get_nrows()
    a_n = a%get_ncols()
    a_nz = a%get_nzeros()
    a_as = a%val
+    a_as_ptr = c_loc(a_as)
    a_ja = a%ja
+    a_ja_ptr = c_loc(a_ja)
    a_irp = a%irp
-    ! a_max_row_nz
+    a_irp_ptr = c_loc(a_irp)
+    ! ! a_max_row_nz
    b_m = b%get_nrows()
    b_n = b%get_ncols()
    b_nz = b%get_nzeros()
    b_as = b%val
+    b_as_ptr = c_loc(b_as)
    b_ja = b%ja
+    b_ja_ptr = c_loc(b_ja)
    b_irp = b%irp
+    b_irp_ptr = c_loc(b_irp)

    if (present(impl_choice)) then
        impl_choice_ = impl_choice
@ -43,53 +103,38 @@ subroutine dspmm(a,b,c,info, impl_choice)
        impl_choice_ = 0
    end if

-    ! Calling psb_f_spmm
-    psb_f_spmm(a_m,a_n,a_nz,&
-            & a_as,a_ja,a_irp,&
-            & a_max_row_nz,&
-            & b_m,b_n,b_nz,&
-            & b_as,b_ja,b_irp,&
-            & impl_choice_,&
-            & c_m,c_n,c_nz,&
-            & c_as,c_ja,c_irp,&
-            & c_max_row_nz,&
-            & info)
+    ! call calculateSize
+    call psb_f_spmm_build_spacc(a_m,a_n,a_nz,a_as_ptr,&
+                                a_ja_ptr,a_irp_ptr,a_max_row_nz,&
+                                b_m,b_n,b_nz,b_as_ptr,b_ja_ptr,&
+                                b_irp_ptr,b_max_row_nz,&
+                                impl_choice_,accumul,&
+                                rows_sizes,tmp_matrix,&
+                                info,nnz)

-    ! Putting results in c
-    c%m = c_m
-    c%n = c_n
-    c%val = c_as
-    c%ja = c_ja
-    c%irp = c_irp
+    ! allocate c%val, c%ja and c%irp
+    allocate(c%val(nnz))
+    allocate(c%ja(nnz))
+    allocate(c%irp(a_m + 1))
+    
+    c_as = c%val
+    c_as_ptr = c_loc(c_as)
+    c_ja = c%ja
+    c_ja_ptr = c_loc(c_ja)
+    c_irp = c%irp
+    c_irp_ptr = c_loc(c_irp)
+
+    ! c%set_nrows(a_m)
+    ! c%set_ncols(b_n)
+
+    ! call spmmRowByRowPopulate
+    call psb_f_spmm_merge_spacc(accumul,&
+                                rows_sizes,&
+                                tmp_matrix,&
+                                impl_choice_,&
+                                c_as_ptr,&
+                                c_ja_ptr,&
+                                c_irp_ptr,&
+                                info)

-    interface
-        subroutine psb_f_spmm(c_a_m,c_a_n,c_a_nz,&
-                            & c_a_as,c_a_ja,c_a_irp,&
-                            & c_a_max_row_nz,
-                            & c_b_m,c_b_n,c_b_nz,&
-                            & c_b_as,c_b_ja,c_b_irp,&
-                            & c_b_max_row_nz,
-                            & c_impl_choice,&
-                            & c_c_m,c_c_n,c_c_nz,&
-                            & c_c_as,c_c_ja,c_c_irp,
-                            & c_c_max_row_nz,&
-                            & c_info) bind(c)
-            use iso_c_binding
-            use psb_base_mod
-            integer(c_size_t), intent(in), value :: c_a_m,c_a_n,c_a_nz
-            real(c_double), intent(in) :: c_a_as
-            integer(c_size_t), intent(in) :: c_a_ja,c_a_irp
-            integer(c_size_t), intent(in), value :: c_a_max_row_nz
-            integer(c_size_t), intent(in), value :: c_b_m,c_b_n,c_b_nz
-            real(c_double), intent(in) :: c_b_as
-            integer(c_size_t), intent(in) :: c_b_ja,c_b_irp
-            integer(c_size_t), intent(in), value :: c_b_max_row_nz
-            integer(c_int), intent(in), value :: c_impl_choice
-            integer(c_size_t), intent(out), value :: c_c_m,c_c_n,c_c_nz
-            real(c_double), intent(out) :: c_c_as
-            integer(c_size_t), intent(out) :: c_c_ja,c_c_irp
-            integer(c_size_t), intent(out), value :: c_c_max_row_nz
-            integer(psb_ipk_), intent(out) :: c_info
-        end subroutine psb_f_spmm
-    end interface
 end subroutine dspmm
--- a/cbind/base/psb_objhandle_mod.F90
+++ b/cbind/base/psb_objhandle_mod.F90
@ -40,4 +40,6 @@ module psb_objhandle_mod

  type, bind(c) :: psb_c_zspmat
    type(c_ptr) :: item = c_null_ptr
-  end type psb_c_zspmat
+  end type psb_c_zspmat
+
+end module psb_objhandle_mod