psblas3-dev:

opt/Makefile
 opt/elldev.c
 opt/elldev.h
 opt/elldev_mod.F90
 opt/psb_d_elg_impl.F90
 opt/psb_d_elg_mat_mod.F90

Moved work on GPU to psblas-gpu.
psblas3-type-indexed
Salvatore Filippone 14 years ago
parent 7c678efd29
commit e04fa17a1c

@ -15,18 +15,14 @@ FINCLUDES=$(FMFLAG)$(LIBDIR) $(FMFLAG).
EXEDIR=./runs
OBJS=psb_d_ell_impl.o psb_d_ell_mat_mod.o rsb_mod.o psb_d_rsb_mat_mod.o \
psb_d_elg_mat_mod.o psb_d_elg_impl.o elldev.o elldev_mod.o
OBJS=psb_d_ell_impl.o psb_d_ell_mat_mod.o rsb_mod.o psb_d_rsb_mat_mod.o
lib: libopt.a
libopt.a: $(OBJS)
ar cur libopt.a $(OBJS)
psb_d_ell_impl.o: psb_d_ell_mat_mod.o
psb_d_rsb_mat_mod.o: rsb_mod.o
psb_d_elg_impl.o: psb_d_elg_mat_mod.o
psb_d_elg_mat_mod.o: elldev_mod.o
elldev.o: elldev.c
elldev.c: elldev.h
clean:
/bin/rm -f $(OBJS) *$(.mod)

@ -1,114 +0,0 @@
#include "elldev.h"
// sparse Ell matrix-vector product
int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY);
int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY);
int writeVecDeviceFloat(void* deviceVec, float* hostVec);
int writeVecDeviceDouble(void* deviceVec, double* hostVec);
int readVecDeviceFloat(void* deviceVec, float* hostVec);
int readVecDeviceDouble(void* deviceVec, double* hostVec);
int dotVecDeviceFloat(float* y_res, void* devVecA, void* devVecB);
int dotVecDeviceDouble(double* y_res, void* devVecA, void* devVecB);
int axpbyVecDeviceFloat(float* alpha, void* devVecX, float* beta, void* devVecY);
int axpbyVecDeviceDouble(double* alpha, void* devVecX, double* beta, void* devVecY);
int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY)
{
#ifdef HAVE_ELL_GPU
return spmvEllDevice(deviceMat, (void *) alpha, deviceX, (void *) beta, deviceY);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY)
{
#ifdef HAVE_ELL_GPU
return spmvEllDevice(deviceMat, (void *) alpha, deviceX, (void *) beta, deviceY);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int writeVecDeviceFloat(void* deviceVec, float* hostVec)
{
#ifdef HAVE_ELL_GPU
return writeVecDevice(deviceVec, (void *) hostVec);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int writeVecDeviceDouble(void* deviceVec, double* hostVec)
{
#ifdef HAVE_ELL_GPU
return writeVecDevice(deviceVec, (void *) hostVec);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int readVecDeviceFloat(void* deviceVec, float* hostVec)
{
#ifdef HAVE_ELL_GPU
return readVecDevice(deviceVec, (void *) hostVec);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int readVecDeviceDouble(void* deviceVec, double* hostVec)
{
#ifdef HAVE_ELL_GPU
return readVecDevice(deviceVec, (void *) hostVec);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int dotVecDeviceFloat(float* y_res, void* devVecA, void* devVecB)
{
#ifdef HAVE_ELL_GPU
return dotVecDevice((void *) y_res, devVecA, devVecB);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int dotVecDeviceDouble(double* y_res, void* devVecA, void* devVecB)
{
#ifdef HAVE_ELL_GPU
return dotVecDevice((void *) y_res, devVecA, devVecB);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int axpbyVecDeviceFloat(float* alpha, void* devVecX, float* beta, void* devVecY)
{
#ifdef HAVE_ELL_GPU
return axpbyVecDevice((void *) alpha, devVecX, (void *) beta, devVecY);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int axpbyVecDeviceDouble(double* alpha, void* devVecX, double* beta, void* devVecY)
{
#ifdef HAVE_ELL_GPU
return axpbyVecDevice((void *) alpha, devVecX, (void *) beta, devVecY);
#else
return CINTRF_UNSUPPORTED;
#endif
}

@ -1,88 +0,0 @@
#ifndef SPGPU_INTERFACE_H
#define SPGPU_INTERFACE_H
//////////////
// legenda:
// cM : compressed Matrix
// rP : row pointers
// rS : row size
/////////////
// element types
#define TYPE_FLOAT 0
#define TYPE_DOUBLE 1
// TYPE_COMPLEX?
// TYPE_INT?
// TYPE_BOOLEAN?
// return codes
#define CINTRF_SUCCESS 0
#define CINTRF_NOMEMORY -1
#define CINTRF_UNSUPPORTED -2
typedef struct EllDeviceParams
{
// The resulting allocation for cM and rP will be pitch*maxRowSize*elementSize
unsigned int elementType;
// Pitch (in number of elements)
unsigned int pitch;
// Number of rows.
// Used to allocate rS array
unsigned int rows;
// Largest row size
unsigned int maxRowSize;
// First index (e.g 0 or 1)
unsigned int firstIndex;
} EllDeviceParams;
#ifdef HAVE_ELL_GPU
// Generate ELLPACK format matrix parameters
EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, unsigned int elementType, unsigned int firstIndex);
// Allocate/Free matrix on device
// remote pointer returned in *remoteMatrix
// (device struct type is hidden, use device pointer as id)
// can return CINTRF_SUCCESS or CINTRF_NOMEMORY or CINTRF_UNSUPPORTED
// return the matrix pointer (use the pointer just as an id) in *deviceMat
int allocEllDevice(void** deviceMat, EllDeviceParams* params);
void freeEllDevice(void* deviceMat);
// Update device copy with host copy
int writeEllDevice(void *deviceMat, void* cM, int* rP, int* rS);
// Update host copy with device copy
int readEllDevice(void *deviceMat, void* cM, int* rP, int* rS);
// sparse Ell matrix-vector product
int spmvEllDevice(void *deviceMat, void* alpha, void* deviceX, void* beta, void* deviceY);
//// Vector management
// can return CINTRF_SUCCESS or CINTRF_NOMEMORY or CINTRF_UNSUPPORTED
// return the vector pointer (use the pointer just as an id) in *deviceVec
int allocVecDevice(void** deviceVec, int size, unsigned int elementType);
void freeVecDevice(void* deviceVec);
// Host Vector -> Device Vector
// if deviceVec is a float device vector, hostVec will be a float array
// if deviceVec is a double device vector, hostVec will be a double array
int writeVecDevice(void* deviceVec, void* hostVec);
// Device Vector -> Host Vector
int readVecDevice(void* deviceVec, void* hostVec);
// dot product (y_res = a * b)
// y_res: pointer to result (e.g. float*/double*)
// devVecA, devVecB: device vectors
// if devVecA and devVecB are float prec. device vectors, y_res should be a pointer to a float value
// if devVecA and devVecB are double prec. device vectors, y_res should be a pointer to a double value
int dotVecDevice(void* y_res, void* devVecA, void* devVecB);
// if devVecX and devVecY are float prec. device vectors, alpha and beta should be pointers to a float value
// if devVecX and devVecY are double prec. device vectors, alpha and beta should be pointers to a double value
int axpbyVecDevice(void* alpha, void* devVecX, void* beta, void* devVecY);
#endif
#endif

@ -1,156 +0,0 @@
module elldev_mod
use iso_c_binding
integer(c_int), parameter :: elldev_float = 0
integer(c_int), parameter :: elldev_double = 1
integer(c_int), parameter :: elldev_success = 0
integer(c_int), parameter :: elldev_nomem = -1
integer(c_int), parameter :: elldev_unsupported = -2
type, bind(c) :: elldev_parms
integer(c_int) :: element_type
integer(c_int) :: pitch
integer(c_int) :: rows
integer(c_int) :: maxRowSize
integer(c_int) :: firstIndex
end type elldev_parms
#ifdef HAVE_ELL_GPU
interface
function getEllDeviceParams(rows,maxRowSize,elementType,firstIndex) &
& result(val) bind(c,name='getEllDeviceParams')
use iso_c_binding
import :: elldev_parms
type(elldev_parms) :: val
integer(c_int), value :: rows,maxRowSize,elementType,firstIndex
end function getEllDeviceParams
end interface
interface
function allocEllDevice(deviceMat,parms) &
& result(val) bind(c,name='allocEllDevice')
use iso_c_binding
import :: elldev_parms
integer(c_int) :: val
type(elldev_parms), value :: parms
type(c_ptr) :: deviceMat
end function allocEllDevice
end interface
interface
subroutine freeEllDevice(deviceMat) &
& bind(c,name='freeEllDevice')
use iso_c_binding
type(c_ptr), value :: deviceMat
end subroutine freeEllDevice
end interface
interface
function allocVecDevice(deviceVec, size, datatype) &
& result(val) bind(c,name='allocVecDevice')
use iso_c_binding
import :: elldev_parms
integer(c_int) :: val
integer(c_int), value :: size, datatype
type(c_ptr) :: deviceVec
end function allocVecDevice
end interface
interface
subroutine freeVecDevice(deviceVec) &
& bind(c,name='freeVecDevice')
use iso_c_binding
type(c_ptr), value :: deviceVec
end subroutine freeVecDevice
end interface
interface writeVecDevice
function writeVecDeviceFloat(deviceVec,hostVec) &
& result(val) bind(c,name='writeVecDeviceFloat')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceVec
real(c_float) :: hostVec(*)
end function writeVecDeviceFloat
function writeVecDeviceDouble(deviceVec,hostVec) &
& result(val) bind(c,name='writeVecDeviceDouble')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceVec
real(c_double) :: hostVec(*)
end function writeVecDeviceDouble
end interface writeVecDevice
interface readVecDevice
function readVecDeviceFloat(deviceVec,hostVec) &
& result(val) bind(c,name='readVecDeviceFloat')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceVec
real(c_float) :: hostVec(*)
end function readVecDeviceFloat
function readVecDeviceDouble(deviceVec,hostVec) &
& result(val) bind(c,name='readVecDeviceDouble')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceVec
real(c_double) :: hostVec(*)
end function readVecDeviceDouble
end interface readVecDevice
interface spmvEllDevice
function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) &
& result(val) bind(c,name='spmvEllDeviceFloat')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceMat, x, y
real(c_float) :: alpha, beta
end function spmvEllDeviceFloat
function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) &
& result(val) bind(c,name='spmvEllDeviceDouble')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceMat, x, y
real(c_double) :: alpha, beta
end function spmvEllDeviceDouble
end interface spmvEllDevice
interface dotVecDevice
function dotVecDeviceFloat(res,deviceVecA,deviceVecB) &
& result(val) bind(c,name='dotVecDeviceFloat')
use iso_c_binding
integer(c_int) :: val
real(c_float) :: res
type(c_ptr), value :: deviceVecA, deviceVecB
end function dotVecDeviceFloat
function dotVecDeviceDouble(res,deviceVecA,deviceVecB) &
& result(val) bind(c,name='dotVecDeviceDouble')
use iso_c_binding
integer(c_int) :: val
real(c_double) :: res
type(c_ptr), value :: deviceVecA, deviceVecB
end function dotVecDeviceDouble
end interface dotVecDevice
interface axpbyVecDevice
function axpbyVecDeviceFloat(alpha,deviceVecA,beta,deviceVecB) &
& result(val) bind(c,name='axpbyVecDeviceFloat')
use iso_c_binding
integer(c_int) :: val
real(c_float) :: alpha, beta
type(c_ptr), value :: deviceVecA, deviceVecB
end function axpbyVecDeviceFloat
function axpbyVecDeviceDouble(alpha,deviceVecA,beta,deviceVecB) &
& result(val) bind(c,name='axpbyVecDeviceDouble')
use iso_c_binding
integer(c_int) :: val
real(c_double) :: alpha,beta
type(c_ptr), value :: deviceVecA, deviceVecB
end function axpbyVecDeviceDouble
end interface axpbyVecDevice
#endif
end module elldev_mod

File diff suppressed because it is too large Load Diff

@ -1,289 +0,0 @@
module psb_d_elg_mat_mod
use iso_c_binding
use psb_d_base_mat_mod
use psb_d_ell_mat_mod
type, extends(psb_d_ell_sparse_mat) :: psb_d_elg_sparse_mat
!
! ITPACK/ELL format, extended.
! We are adding here the routines to create a copy of the data
! into the GPU.
! If HAVE_ELL_GPU is undefined this is just
! a copy of ELL, indistinguishable.
!
#ifdef HAVE_ELL_GPU
type(c_ptr) :: deviceMat = c_null_ptr
contains
procedure, pass(a) :: get_fmt => d_elg_get_fmt
procedure, pass(a) :: sizeof => d_elg_sizeof
procedure, pass(a) :: d_csmm => psb_d_elg_csmm
procedure, pass(a) :: d_csmv => psb_d_elg_csmv
procedure, pass(a) :: d_scals => psb_d_elg_scals
procedure, pass(a) :: d_scal => psb_d_elg_scal
procedure, pass(a) :: reallocate_nz => psb_d_elg_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_d_elg_allocate_mnnz
procedure, pass(a) :: cp_from_coo => psb_d_cp_elg_from_coo
procedure, pass(a) :: cp_from_fmt => psb_d_cp_elg_from_fmt
procedure, pass(a) :: mv_from_coo => psb_d_mv_elg_from_coo
procedure, pass(a) :: mv_from_fmt => psb_d_mv_elg_from_fmt
procedure, pass(a) :: free => d_elg_free
procedure, pass(a) :: mold => psb_d_elg_mold
procedure, pass(a) :: psb_d_elg_cp_from
generic, public :: cp_from => psb_d_elg_cp_from
procedure, pass(a) :: psb_d_elg_mv_from
generic, public :: mv_from => psb_d_elg_mv_from
#endif
end type psb_d_elg_sparse_mat
#ifdef HAVE_ELL_GPU
private :: d_elg_get_nzeros, d_elg_free, d_elg_get_fmt, &
& d_elg_get_size, d_elg_sizeof, d_elg_get_nz_row
interface
subroutine psb_d_elg_reallocate_nz(nz,a)
import :: psb_d_elg_sparse_mat
integer, intent(in) :: nz
class(psb_d_elg_sparse_mat), intent(inout) :: a
end subroutine psb_d_elg_reallocate_nz
end interface
interface
subroutine psb_d_elg_allocate_mnnz(m,n,a,nz)
import :: psb_d_elg_sparse_mat
integer, intent(in) :: m,n
class(psb_d_elg_sparse_mat), intent(inout) :: a
integer, intent(in), optional :: nz
end subroutine psb_d_elg_allocate_mnnz
end interface
interface
subroutine psb_d_elg_mold(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_long_int_k_
class(psb_d_elg_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(out), allocatable :: b
integer, intent(out) :: info
end subroutine psb_d_elg_mold
end interface
interface
subroutine psb_d_cp_elg_from_coo(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat
class(psb_d_elg_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer, intent(out) :: info
end subroutine psb_d_cp_elg_from_coo
end interface
interface
subroutine psb_d_cp_elg_from_fmt(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat
class(psb_d_elg_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer, intent(out) :: info
end subroutine psb_d_cp_elg_from_fmt
end interface
interface
subroutine psb_d_mv_elg_from_coo(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat
class(psb_d_elg_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer, intent(out) :: info
end subroutine psb_d_mv_elg_from_coo
end interface
interface
subroutine psb_d_mv_elg_from_fmt(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat
class(psb_d_elg_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer, intent(out) :: info
end subroutine psb_d_mv_elg_from_fmt
end interface
interface
subroutine psb_d_elg_cp_from(a,b)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(inout) :: a
type(psb_d_elg_sparse_mat), intent(in) :: b
end subroutine psb_d_elg_cp_from
end interface
interface
subroutine psb_d_elg_mv_from(a,b)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(inout) :: a
type(psb_d_elg_sparse_mat), intent(inout) :: b
end subroutine psb_d_elg_mv_from
end interface
interface
subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer, intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_elg_csmv
end interface
interface
subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
real(psb_dpk_), intent(inout) :: y(:,:)
integer, intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_elg_csmm
end interface
interface
subroutine psb_d_elg_scal(d,a,info)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d(:)
integer, intent(out) :: info
end subroutine psb_d_elg_scal
end interface
interface
subroutine psb_d_elg_scals(d,a,info)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d
integer, intent(out) :: info
end subroutine psb_d_elg_scals
end interface
contains
! == ===================================
!
!
!
! Getters
!
!
!
!
!
! == ===================================
function d_elg_sizeof(a) result(res)
implicit none
class(psb_d_elg_sparse_mat), intent(in) :: a
integer(psb_long_int_k_) :: res
res = 8
res = res + psb_sizeof_dp * size(a%val)
res = res + psb_sizeof_int * size(a%irn)
res = res + psb_sizeof_int * size(a%idiag)
res = res + psb_sizeof_int * size(a%ja)
! Should we account for the shadow data structure
! on the GPU device side?
! res = 2*res
end function d_elg_sizeof
function d_elg_get_fmt(a) result(res)
implicit none
class(psb_d_elg_sparse_mat), intent(in) :: a
character(len=5) :: res
res = 'ELG'
end function d_elg_get_fmt
!!$ function d_elg_get_nzeros(a) result(res)
!!$ implicit none
!!$ class(psb_d_elg_sparse_mat), intent(in) :: a
!!$ integer :: res
!!$ res = sum(a%irn(1:a%get_nrows()))
!!$ end function d_elg_get_nzeros
!!$
!!$ function d_elg_get_size(a) result(res)
!!$ implicit none
!!$ class(psb_d_elg_sparse_mat), intent(in) :: a
!!$ integer :: res
!!$
!!$ res = -1
!!$
!!$ if (allocated(a%ja)) then
!!$ if (res >= 0) then
!!$ res = min(res,size(a%ja))
!!$ else
!!$ res = size(a%ja)
!!$ end if
!!$ end if
!!$ if (allocated(a%val)) then
!!$ if (res >= 0) then
!!$ res = min(res,size(a%val))
!!$ else
!!$ res = size(a%val)
!!$ end if
!!$ end if
!!$
!!$ end function d_elg_get_size
!!$
!!$
!!$
!!$ function d_elg_get_nz_row(idx,a) result(res)
!!$
!!$ implicit none
!!$
!!$ class(psb_d_elg_sparse_mat), intent(in) :: a
!!$ integer, intent(in) :: idx
!!$ integer :: res
!!$
!!$ res = 0
!!$
!!$ if ((1<=idx).and.(idx<=a%get_nrows())) then
!!$ res = a%irn(idx)
!!$ end if
!!$
!!$ end function d_elg_get_nz_row
!!$
! == ===================================
!
!
!
! Data management
!
!
!
!
!
! == ===================================
subroutine d_elg_free(a)
#ifdef HAVE_ELL_GPU
use elldev_mod
#endif
implicit none
integer :: info
class(psb_d_elg_sparse_mat), intent(inout) :: a
if (allocated(a%idiag)) deallocate(a%idiag)
if (allocated(a%irn)) deallocate(a%irn)
if (allocated(a%ja)) deallocate(a%ja)
if (allocated(a%val)) deallocate(a%val)
call a%set_null()
call a%set_nrows(0)
call a%set_ncols(0)
#ifdef HAVE_ELL_GPU
call freeEllDevice(a%deviceMat)
#endif
return
end subroutine d_elg_free
#endif
end module psb_d_elg_mat_mod

@ -5,7 +5,7 @@ CSR Storage format for matrix A: CSR COO JAD
040 Domain size (acutal system is this**3)
2 Stopping criterion
0100 MAXIT
-1 ITRACE
01 ITRACE
20 IRST restart for RGMRES and BiCGSTABL

Loading…
Cancel
Save