psblas3-dev:

opt/Makefile
 opt/elldev.c
 opt/elldev.h
 opt/elldev_mod.F90
 opt/psb_d_elg_impl.F90
 opt/psb_d_elg_mat_mod.F90

Moved work on GPU to psblas-gpu.
psblas3-type-indexed
Salvatore Filippone 14 years ago
parent 7c678efd29
commit e04fa17a1c

@ -15,18 +15,14 @@ FINCLUDES=$(FMFLAG)$(LIBDIR) $(FMFLAG).
EXEDIR=./runs
OBJS=psb_d_ell_impl.o psb_d_ell_mat_mod.o rsb_mod.o psb_d_rsb_mat_mod.o \
psb_d_elg_mat_mod.o psb_d_elg_impl.o elldev.o elldev_mod.o
OBJS=psb_d_ell_impl.o psb_d_ell_mat_mod.o rsb_mod.o psb_d_rsb_mat_mod.o
lib: libopt.a
libopt.a: $(OBJS)
ar cur libopt.a $(OBJS)
psb_d_ell_impl.o: psb_d_ell_mat_mod.o
psb_d_rsb_mat_mod.o: rsb_mod.o
psb_d_elg_impl.o: psb_d_elg_mat_mod.o
psb_d_elg_mat_mod.o: elldev_mod.o
elldev.o: elldev.c
elldev.c: elldev.h
clean:
/bin/rm -f $(OBJS) *$(.mod)

@ -1,114 +0,0 @@
#include "elldev.h"
// sparse Ell matrix-vector product
int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY);
int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY);
int writeVecDeviceFloat(void* deviceVec, float* hostVec);
int writeVecDeviceDouble(void* deviceVec, double* hostVec);
int readVecDeviceFloat(void* deviceVec, float* hostVec);
int readVecDeviceDouble(void* deviceVec, double* hostVec);
int dotVecDeviceFloat(float* y_res, void* devVecA, void* devVecB);
int dotVecDeviceDouble(double* y_res, void* devVecA, void* devVecB);
int axpbyVecDeviceFloat(float* alpha, void* devVecX, float* beta, void* devVecY);
int axpbyVecDeviceDouble(double* alpha, void* devVecX, double* beta, void* devVecY);
int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY)
{
#ifdef HAVE_ELL_GPU
return spmvEllDevice(deviceMat, (void *) alpha, deviceX, (void *) beta, deviceY);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY)
{
#ifdef HAVE_ELL_GPU
return spmvEllDevice(deviceMat, (void *) alpha, deviceX, (void *) beta, deviceY);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int writeVecDeviceFloat(void* deviceVec, float* hostVec)
{
#ifdef HAVE_ELL_GPU
return writeVecDevice(deviceVec, (void *) hostVec);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int writeVecDeviceDouble(void* deviceVec, double* hostVec)
{
#ifdef HAVE_ELL_GPU
return writeVecDevice(deviceVec, (void *) hostVec);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int readVecDeviceFloat(void* deviceVec, float* hostVec)
{
#ifdef HAVE_ELL_GPU
return readVecDevice(deviceVec, (void *) hostVec);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int readVecDeviceDouble(void* deviceVec, double* hostVec)
{
#ifdef HAVE_ELL_GPU
return readVecDevice(deviceVec, (void *) hostVec);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int dotVecDeviceFloat(float* y_res, void* devVecA, void* devVecB)
{
#ifdef HAVE_ELL_GPU
return dotVecDevice((void *) y_res, devVecA, devVecB);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int dotVecDeviceDouble(double* y_res, void* devVecA, void* devVecB)
{
#ifdef HAVE_ELL_GPU
return dotVecDevice((void *) y_res, devVecA, devVecB);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int axpbyVecDeviceFloat(float* alpha, void* devVecX, float* beta, void* devVecY)
{
#ifdef HAVE_ELL_GPU
return axpbyVecDevice((void *) alpha, devVecX, (void *) beta, devVecY);
#else
return CINTRF_UNSUPPORTED;
#endif
}
int axpbyVecDeviceDouble(double* alpha, void* devVecX, double* beta, void* devVecY)
{
#ifdef HAVE_ELL_GPU
return axpbyVecDevice((void *) alpha, devVecX, (void *) beta, devVecY);
#else
return CINTRF_UNSUPPORTED;
#endif
}

@ -1,88 +0,0 @@
#ifndef SPGPU_INTERFACE_H
#define SPGPU_INTERFACE_H
//////////////
// legenda:
// cM : compressed Matrix
// rP : row pointers
// rS : row size
/////////////
// element types
#define TYPE_FLOAT 0
#define TYPE_DOUBLE 1
// TYPE_COMPLEX?
// TYPE_INT?
// TYPE_BOOLEAN?
// return codes
#define CINTRF_SUCCESS 0
#define CINTRF_NOMEMORY -1
#define CINTRF_UNSUPPORTED -2
typedef struct EllDeviceParams
{
// The resulting allocation for cM and rP will be pitch*maxRowSize*elementSize
unsigned int elementType;
// Pitch (in number of elements)
unsigned int pitch;
// Number of rows.
// Used to allocate rS array
unsigned int rows;
// Largest row size
unsigned int maxRowSize;
// First index (e.g 0 or 1)
unsigned int firstIndex;
} EllDeviceParams;
#ifdef HAVE_ELL_GPU
// Generate ELLPACK format matrix parameters
EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, unsigned int elementType, unsigned int firstIndex);
// Allocate/Free matrix on device
// remote pointer returned in *remoteMatrix
// (device struct type is hidden, use device pointer as id)
// can return CINTRF_SUCCESS or CINTRF_NOMEMORY or CINTRF_UNSUPPORTED
// return the matrix pointer (use the pointer just as an id) in *deviceMat
int allocEllDevice(void** deviceMat, EllDeviceParams* params);
void freeEllDevice(void* deviceMat);
// Update device copy with host copy
int writeEllDevice(void *deviceMat, void* cM, int* rP, int* rS);
// Update host copy with device copy
int readEllDevice(void *deviceMat, void* cM, int* rP, int* rS);
// sparse Ell matrix-vector product
int spmvEllDevice(void *deviceMat, void* alpha, void* deviceX, void* beta, void* deviceY);
//// Vector management
// can return CINTRF_SUCCESS or CINTRF_NOMEMORY or CINTRF_UNSUPPORTED
// return the vector pointer (use the pointer just as an id) in *deviceVec
int allocVecDevice(void** deviceVec, int size, unsigned int elementType);
void freeVecDevice(void* deviceVec);
// Host Vector -> Device Vector
// if deviceVec is a float device vector, hostVec will be a float array
// if deviceVec is a double device vector, hostVec will be a double array
int writeVecDevice(void* deviceVec, void* hostVec);
// Device Vector -> Host Vector
int readVecDevice(void* deviceVec, void* hostVec);
// dot product (y_res = a * b)
// y_res: pointer to result (e.g. float*/double*)
// devVecA, devVecB: device vectors
// if devVecA and devVecB are float prec. device vectors, y_res should be a pointer to a float value
// if devVecA and devVecB are double prec. device vectors, y_res should be a pointer to a double value
int dotVecDevice(void* y_res, void* devVecA, void* devVecB);
// if devVecX and devVecY are float prec. device vectors, alpha and beta should be pointers to a float value
// if devVecX and devVecY are double prec. device vectors, alpha and beta should be pointers to a double value
int axpbyVecDevice(void* alpha, void* devVecX, void* beta, void* devVecY);
#endif
#endif

@ -1,156 +0,0 @@
module elldev_mod
use iso_c_binding
integer(c_int), parameter :: elldev_float = 0
integer(c_int), parameter :: elldev_double = 1
integer(c_int), parameter :: elldev_success = 0
integer(c_int), parameter :: elldev_nomem = -1
integer(c_int), parameter :: elldev_unsupported = -2
type, bind(c) :: elldev_parms
integer(c_int) :: element_type
integer(c_int) :: pitch
integer(c_int) :: rows
integer(c_int) :: maxRowSize
integer(c_int) :: firstIndex
end type elldev_parms
#ifdef HAVE_ELL_GPU
interface
function getEllDeviceParams(rows,maxRowSize,elementType,firstIndex) &
& result(val) bind(c,name='getEllDeviceParams')
use iso_c_binding
import :: elldev_parms
type(elldev_parms) :: val
integer(c_int), value :: rows,maxRowSize,elementType,firstIndex
end function getEllDeviceParams
end interface
interface
function allocEllDevice(deviceMat,parms) &
& result(val) bind(c,name='allocEllDevice')
use iso_c_binding
import :: elldev_parms
integer(c_int) :: val
type(elldev_parms), value :: parms
type(c_ptr) :: deviceMat
end function allocEllDevice
end interface
interface
subroutine freeEllDevice(deviceMat) &
& bind(c,name='freeEllDevice')
use iso_c_binding
type(c_ptr), value :: deviceMat
end subroutine freeEllDevice
end interface
interface
function allocVecDevice(deviceVec, size, datatype) &
& result(val) bind(c,name='allocVecDevice')
use iso_c_binding
import :: elldev_parms
integer(c_int) :: val
integer(c_int), value :: size, datatype
type(c_ptr) :: deviceVec
end function allocVecDevice
end interface
interface
subroutine freeVecDevice(deviceVec) &
& bind(c,name='freeVecDevice')
use iso_c_binding
type(c_ptr), value :: deviceVec
end subroutine freeVecDevice
end interface
interface writeVecDevice
function writeVecDeviceFloat(deviceVec,hostVec) &
& result(val) bind(c,name='writeVecDeviceFloat')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceVec
real(c_float) :: hostVec(*)
end function writeVecDeviceFloat
function writeVecDeviceDouble(deviceVec,hostVec) &
& result(val) bind(c,name='writeVecDeviceDouble')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceVec
real(c_double) :: hostVec(*)
end function writeVecDeviceDouble
end interface writeVecDevice
interface readVecDevice
function readVecDeviceFloat(deviceVec,hostVec) &
& result(val) bind(c,name='readVecDeviceFloat')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceVec
real(c_float) :: hostVec(*)
end function readVecDeviceFloat
function readVecDeviceDouble(deviceVec,hostVec) &
& result(val) bind(c,name='readVecDeviceDouble')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceVec
real(c_double) :: hostVec(*)
end function readVecDeviceDouble
end interface readVecDevice
interface spmvEllDevice
function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) &
& result(val) bind(c,name='spmvEllDeviceFloat')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceMat, x, y
real(c_float) :: alpha, beta
end function spmvEllDeviceFloat
function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) &
& result(val) bind(c,name='spmvEllDeviceDouble')
use iso_c_binding
integer(c_int) :: val
type(c_ptr), value :: deviceMat, x, y
real(c_double) :: alpha, beta
end function spmvEllDeviceDouble
end interface spmvEllDevice
interface dotVecDevice
function dotVecDeviceFloat(res,deviceVecA,deviceVecB) &
& result(val) bind(c,name='dotVecDeviceFloat')
use iso_c_binding
integer(c_int) :: val
real(c_float) :: res
type(c_ptr), value :: deviceVecA, deviceVecB
end function dotVecDeviceFloat
function dotVecDeviceDouble(res,deviceVecA,deviceVecB) &
& result(val) bind(c,name='dotVecDeviceDouble')
use iso_c_binding
integer(c_int) :: val
real(c_double) :: res
type(c_ptr), value :: deviceVecA, deviceVecB
end function dotVecDeviceDouble
end interface dotVecDevice
interface axpbyVecDevice
function axpbyVecDeviceFloat(alpha,deviceVecA,beta,deviceVecB) &
& result(val) bind(c,name='axpbyVecDeviceFloat')
use iso_c_binding
integer(c_int) :: val
real(c_float) :: alpha, beta
type(c_ptr), value :: deviceVecA, deviceVecB
end function axpbyVecDeviceFloat
function axpbyVecDeviceDouble(alpha,deviceVecA,beta,deviceVecB) &
& result(val) bind(c,name='axpbyVecDeviceDouble')
use iso_c_binding
integer(c_int) :: val
real(c_double) :: alpha,beta
type(c_ptr), value :: deviceVecA, deviceVecB
end function axpbyVecDeviceDouble
end interface axpbyVecDevice
#endif
end module elldev_mod

File diff suppressed because it is too large Load Diff

@ -1,289 +0,0 @@
module psb_d_elg_mat_mod
use iso_c_binding
use psb_d_base_mat_mod
use psb_d_ell_mat_mod
type, extends(psb_d_ell_sparse_mat) :: psb_d_elg_sparse_mat
!
! ITPACK/ELL format, extended.
! We are adding here the routines to create a copy of the data
! into the GPU.
! If HAVE_ELL_GPU is undefined this is just
! a copy of ELL, indistinguishable.
!
#ifdef HAVE_ELL_GPU
type(c_ptr) :: deviceMat = c_null_ptr
contains
procedure, pass(a) :: get_fmt => d_elg_get_fmt
procedure, pass(a) :: sizeof => d_elg_sizeof
procedure, pass(a) :: d_csmm => psb_d_elg_csmm
procedure, pass(a) :: d_csmv => psb_d_elg_csmv
procedure, pass(a) :: d_scals => psb_d_elg_scals
procedure, pass(a) :: d_scal => psb_d_elg_scal
procedure, pass(a) :: reallocate_nz => psb_d_elg_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_d_elg_allocate_mnnz
procedure, pass(a) :: cp_from_coo => psb_d_cp_elg_from_coo
procedure, pass(a) :: cp_from_fmt => psb_d_cp_elg_from_fmt
procedure, pass(a) :: mv_from_coo => psb_d_mv_elg_from_coo
procedure, pass(a) :: mv_from_fmt => psb_d_mv_elg_from_fmt
procedure, pass(a) :: free => d_elg_free
procedure, pass(a) :: mold => psb_d_elg_mold
procedure, pass(a) :: psb_d_elg_cp_from
generic, public :: cp_from => psb_d_elg_cp_from
procedure, pass(a) :: psb_d_elg_mv_from
generic, public :: mv_from => psb_d_elg_mv_from
#endif
end type psb_d_elg_sparse_mat
#ifdef HAVE_ELL_GPU
private :: d_elg_get_nzeros, d_elg_free, d_elg_get_fmt, &
& d_elg_get_size, d_elg_sizeof, d_elg_get_nz_row
interface
subroutine psb_d_elg_reallocate_nz(nz,a)
import :: psb_d_elg_sparse_mat
integer, intent(in) :: nz
class(psb_d_elg_sparse_mat), intent(inout) :: a
end subroutine psb_d_elg_reallocate_nz
end interface
interface
subroutine psb_d_elg_allocate_mnnz(m,n,a,nz)
import :: psb_d_elg_sparse_mat
integer, intent(in) :: m,n
class(psb_d_elg_sparse_mat), intent(inout) :: a
integer, intent(in), optional :: nz
end subroutine psb_d_elg_allocate_mnnz
end interface
interface
subroutine psb_d_elg_mold(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_long_int_k_
class(psb_d_elg_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(out), allocatable :: b
integer, intent(out) :: info
end subroutine psb_d_elg_mold
end interface
interface
subroutine psb_d_cp_elg_from_coo(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat
class(psb_d_elg_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer, intent(out) :: info
end subroutine psb_d_cp_elg_from_coo
end interface
interface
subroutine psb_d_cp_elg_from_fmt(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat
class(psb_d_elg_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer, intent(out) :: info
end subroutine psb_d_cp_elg_from_fmt
end interface
interface
subroutine psb_d_mv_elg_from_coo(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat
class(psb_d_elg_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer, intent(out) :: info
end subroutine psb_d_mv_elg_from_coo
end interface
interface
subroutine psb_d_mv_elg_from_fmt(a,b,info)
import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat
class(psb_d_elg_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer, intent(out) :: info
end subroutine psb_d_mv_elg_from_fmt
end interface
interface
subroutine psb_d_elg_cp_from(a,b)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(inout) :: a
type(psb_d_elg_sparse_mat), intent(in) :: b
end subroutine psb_d_elg_cp_from
end interface
interface
subroutine psb_d_elg_mv_from(a,b)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(inout) :: a
type(psb_d_elg_sparse_mat), intent(inout) :: b
end subroutine psb_d_elg_mv_from
end interface
interface
subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer, intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_elg_csmv
end interface
interface
subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
real(psb_dpk_), intent(inout) :: y(:,:)
integer, intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_elg_csmm
end interface
interface
subroutine psb_d_elg_scal(d,a,info)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d(:)
integer, intent(out) :: info
end subroutine psb_d_elg_scal
end interface
interface
subroutine psb_d_elg_scals(d,a,info)
import :: psb_d_elg_sparse_mat, psb_dpk_
class(psb_d_elg_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d
integer, intent(out) :: info
end subroutine psb_d_elg_scals
end interface
contains
! == ===================================
!
!
!
! Getters
!
!
!
!
!
! == ===================================
function d_elg_sizeof(a) result(res)
implicit none
class(psb_d_elg_sparse_mat), intent(in) :: a
integer(psb_long_int_k_) :: res
res = 8
res = res + psb_sizeof_dp * size(a%val)
res = res + psb_sizeof_int * size(a%irn)
res = res + psb_sizeof_int * size(a%idiag)
res = res + psb_sizeof_int * size(a%ja)
! Should we account for the shadow data structure
! on the GPU device side?
! res = 2*res
end function d_elg_sizeof
function d_elg_get_fmt(a) result(res)
implicit none
class(psb_d_elg_sparse_mat), intent(in) :: a
character(len=5) :: res
res = 'ELG'
end function d_elg_get_fmt
!!$ function d_elg_get_nzeros(a) result(res)
!!$ implicit none
!!$ class(psb_d_elg_sparse_mat), intent(in) :: a
!!$ integer :: res
!!$ res = sum(a%irn(1:a%get_nrows()))
!!$ end function d_elg_get_nzeros
!!$
!!$ function d_elg_get_size(a) result(res)
!!$ implicit none
!!$ class(psb_d_elg_sparse_mat), intent(in) :: a
!!$ integer :: res
!!$
!!$ res = -1
!!$
!!$ if (allocated(a%ja)) then
!!$ if (res >= 0) then
!!$ res = min(res,size(a%ja))
!!$ else
!!$ res = size(a%ja)
!!$ end if
!!$ end if
!!$ if (allocated(a%val)) then
!!$ if (res >= 0) then
!!$ res = min(res,size(a%val))
!!$ else
!!$ res = size(a%val)
!!$ end if
!!$ end if
!!$
!!$ end function d_elg_get_size
!!$
!!$
!!$
!!$ function d_elg_get_nz_row(idx,a) result(res)
!!$
!!$ implicit none
!!$
!!$ class(psb_d_elg_sparse_mat), intent(in) :: a
!!$ integer, intent(in) :: idx
!!$ integer :: res
!!$
!!$ res = 0
!!$
!!$ if ((1<=idx).and.(idx<=a%get_nrows())) then
!!$ res = a%irn(idx)
!!$ end if
!!$
!!$ end function d_elg_get_nz_row
!!$
! == ===================================
!
!
!
! Data management
!
!
!
!
!
! == ===================================
subroutine d_elg_free(a)
#ifdef HAVE_ELL_GPU
use elldev_mod
#endif
implicit none
integer :: info
class(psb_d_elg_sparse_mat), intent(inout) :: a
if (allocated(a%idiag)) deallocate(a%idiag)
if (allocated(a%irn)) deallocate(a%irn)
if (allocated(a%ja)) deallocate(a%ja)
if (allocated(a%val)) deallocate(a%val)
call a%set_null()
call a%set_nrows(0)
call a%set_ncols(0)
#ifdef HAVE_ELL_GPU
call freeEllDevice(a%deviceMat)
#endif
return
end subroutine d_elg_free
#endif
end module psb_d_elg_mat_mod

@ -1,11 +1,11 @@
7 Number of entries below this
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES
BJAC Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO JAD
040 Domain size (acutal system is this**3)
2 Stopping criterion
0100 MAXIT
-1 ITRACE
0100 MAXIT
01 ITRACE
20 IRST restart for RGMRES and BiCGSTABL

Loading…
Cancel
Save