First test of OPENMP enabled version

psblas3-openmp
Salvatore Filippone 13 years ago
parent c7c0251359
commit 922a95bf68

@ -25,9 +25,11 @@ EXTRA_OPT=@EXTRA_OPT@
MPF90=@MPF90@ MPF90=@MPF90@
MPF77=@MPF77@ MPF77=@MPF77@
MPCC=@MPCC@ MPCC=@MPCC@
EXTRA90LINKOPT=@EXTRA90LINKOPT@
EXTRALINKOPT=@EXTRALINKOPT@
F90LINK=$(MPF90) F90LINK=$(MPF90) $(EXTRA90LINKOPT)
FLINK=$(MPF77) FLINK=$(MPF77) $(EXTRALINKOPT)
LIBS=@LIBS@ LIBS=@LIBS@

@ -769,6 +769,7 @@ contains
select type(xx => x) select type(xx => x)
type is (psb_d_base_vect_type) type is (psb_d_base_vect_type)
n = min(size(y%v), size(xx%v)) n = min(size(y%v), size(xx%v))
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
y%v(i) = y%v(i)*xx%v(i) y%v(i) = y%v(i)*xx%v(i)
end do end do
@ -795,6 +796,7 @@ contains
info = 0 info = 0
n = min(size(y%v), size(x)) n = min(size(y%v), size(x))
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
y%v(i) = y%v(i)*x(i) y%v(i) = y%v(i)*x(i)
end do end do
@ -830,6 +832,7 @@ contains
if (beta == done) then if (beta == done) then
return return
else else
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -837,42 +840,51 @@ contains
else else
if (alpha == done) then if (alpha == done) then
if (beta == dzero) then if (beta == dzero) then
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = y(i)*x(i) z%v(i) = y(i)*x(i)
end do end do
else if (beta == done) then else if (beta == done) then
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
end if end if
else if (alpha == -done) then else if (alpha == -done) then
if (beta == dzero) then if (beta == dzero) then
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = -y(i)*x(i) z%v(i) = -y(i)*x(i)
end do end do
else if (beta == done) then else if (beta == done) then
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == dzero) then if (beta == dzero) then
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == done) then else if (beta == done) then
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) schedule(static,64)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do

@ -77,6 +77,7 @@ C
if (alpha.eq.dzero) then if (alpha.eq.dzero) then
if (beta.eq.dzero) then if (beta.eq.dzero) then
do j=1, n do j=1, n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = dzero y(i,j) = dzero
enddo enddo
@ -88,12 +89,14 @@ c$$$
else if (beta.eq.-done) then else if (beta.eq.-done) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = - y(i,j) y(i,j) = - y(i,j)
enddo enddo
enddo enddo
else else
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = beta*y(i,j) y(i,j) = beta*y(i,j)
enddo enddo
@ -104,12 +107,14 @@ c$$$
if (beta.eq.dzero) then if (beta.eq.dzero) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = x(i,j) y(i,j) = x(i,j)
enddo enddo
enddo enddo
else if (beta.eq.done) then else if (beta.eq.done) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = x(i,j) + y(i,j) y(i,j) = x(i,j) + y(i,j)
enddo enddo
@ -117,12 +122,14 @@ c$$$
else if (beta.eq.-done) then else if (beta.eq.-done) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = x(i,j) - y(i,j) y(i,j) = x(i,j) - y(i,j)
enddo enddo
enddo enddo
else else
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = x(i,j) + beta*y(i,j) y(i,j) = x(i,j) + beta*y(i,j)
enddo enddo
@ -133,12 +140,14 @@ c$$$
if (beta.eq.dzero) then if (beta.eq.dzero) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = -x(i,j) y(i,j) = -x(i,j)
enddo enddo
enddo enddo
else if (beta.eq.done) then else if (beta.eq.done) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = -x(i,j) + y(i,j) y(i,j) = -x(i,j) + y(i,j)
enddo enddo
@ -146,12 +155,14 @@ c$$$
else if (beta.eq.-done) then else if (beta.eq.-done) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = -x(i,j) - y(i,j) y(i,j) = -x(i,j) - y(i,j)
enddo enddo
enddo enddo
else else
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = -x(i,j) + beta*y(i,j) y(i,j) = -x(i,j) + beta*y(i,j)
enddo enddo
@ -162,12 +173,14 @@ c$$$
if (beta.eq.dzero) then if (beta.eq.dzero) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = alpha*x(i,j) y(i,j) = alpha*x(i,j)
enddo enddo
enddo enddo
else if (beta.eq.done) then else if (beta.eq.done) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = alpha*x(i,j) + y(i,j) y(i,j) = alpha*x(i,j) + y(i,j)
enddo enddo
@ -175,12 +188,14 @@ c$$$
else if (beta.eq.-done) then else if (beta.eq.-done) then
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = alpha*x(i,j) - y(i,j) y(i,j) = alpha*x(i,j) - y(i,j)
enddo enddo
enddo enddo
else else
do j=1,n do j=1,n
C$omp parallel do private(i) schedule(static,64)
do i=1,m do i=1,m
y(i,j) = alpha*x(i,j) + beta*y(i,j) y(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo enddo

@ -152,6 +152,7 @@ contains
if (beta == dzero) then if (beta == dzero) then
if (alpha == done) then if (alpha == done) then
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -162,6 +163,7 @@ contains
else if (alpha == -done) then else if (alpha == -done) then
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -172,6 +174,7 @@ contains
else else
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -186,6 +189,7 @@ contains
else if (beta == done) then else if (beta == done) then
if (alpha == done) then if (alpha == done) then
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -196,6 +200,7 @@ contains
else if (alpha == -done) then else if (alpha == -done) then
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -206,6 +211,7 @@ contains
else else
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -219,6 +225,7 @@ contains
else if (beta == -done) then else if (beta == -done) then
if (alpha == done) then if (alpha == done) then
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -229,6 +236,7 @@ contains
else if (alpha == -done) then else if (alpha == -done) then
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -239,6 +247,7 @@ contains
else else
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -252,6 +261,7 @@ contains
else else
if (alpha == done) then if (alpha == done) then
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -262,6 +272,7 @@ contains
else if (alpha == -done) then else if (alpha == -done) then
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1
@ -272,6 +283,7 @@ contains
else else
!$omp parallel do private(i,acc,j) schedule(static,64)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=irp(i), irp(i+1)-1 do j=irp(i), irp(i+1)-1

@ -358,6 +358,37 @@ else
fi fi
] ]
) )
dnl @synopsis PAC_ARG_ENABLE_OPENMP
dnl
dnl Test for --enable-openmp
dnl
dnl
dnl
dnl Example use:
dnl
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_ENABLE_OPENMP],
[
AC_MSG_CHECKING([whether we want serial mpi stubs])
AC_ARG_ENABLE(openmp,
AC_HELP_STRING([--enable-openmp],
[Specify whether to enable OPENMP compilation ]),
[
pac_cv_openmp="yes";
]
dnl ,
dnl [pac_cv_openmp="no";]
)
if test x"$pac_cv_openmp" == x"yes" ; then
AC_MSG_RESULT([yes.])
else
pac_cv_openmp="no";
AC_MSG_RESULT([no.])
fi
]
)
dnl @synopsis PAC_ARG_LONG_INTEGERS dnl @synopsis PAC_ARG_LONG_INTEGERS
dnl dnl

41
configure vendored

@ -658,8 +658,8 @@ INSTALL_DIR
INSTALL INSTALL
MPIFCC MPIFCC
AR AR
F90LINK EXTRA90LINKOPT
FLINK EXTRALINKOPT
MODEXT MODEXT
FMFLAG FMFLAG
FIFLAG FIFLAG
@ -785,6 +785,7 @@ with_library_path
with_include_path with_include_path
with_module_path with_module_path
enable_dependency_tracking enable_dependency_tracking
enable_openmp
enable_long_integers enable_long_integers
with_blas with_blas
with_lapack with_lapack
@ -1441,6 +1442,7 @@ Optional Features:
in serial mode. in serial mode.
--disable-dependency-tracking speeds up one-time build --disable-dependency-tracking speeds up one-time build
--enable-dependency-tracking do not reject slow dependency extractors --enable-dependency-tracking do not reject slow dependency extractors
--enable-openmp Specify whether to enable OPENMP compilation
--enable-long-integers Specify usage of 64 bits integers. --enable-long-integers Specify usage of 64 bits integers.
Optional Packages: Optional Packages:
@ -7367,6 +7369,35 @@ fi
# The same for pg (Portland Group compilers). # The same for pg (Portland Group compilers).
############################################################################### ###############################################################################
{ $as_echo "$as_me:$LINENO: checking whether we want serial mpi stubs" >&5
$as_echo_n "checking whether we want serial mpi stubs... " >&6; }
# Check whether --enable-openmp was given.
if test "${enable_openmp+set}" = set; then
enableval=$enable_openmp;
pac_cv_openmp="yes";
fi
if test x"$pac_cv_openmp" == x"yes" ; then
{ $as_echo "$as_me:$LINENO: result: yes." >&5
$as_echo "yes." >&6; }
else
pac_cv_openmp="no";
{ $as_echo "$as_me:$LINENO: result: no." >&5
$as_echo "no." >&6; }
fi
if test x"$pac_cv_openmp" == x"yes" ; then
FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES";
if test "X$psblas_cv_fc" == "Xgcc" ; then
F90COPT="$F90COPT -fopenmp";
FCOPT="$FCOPT -fopenmp";
EXTRALINKOPT=" -fopenmp";
EXTRA90LINKOPT=" -fopenmp";
fi
fi
############################################################################### ###############################################################################
# Custom test : do we have a module or include for MPI Fortran interface? # Custom test : do we have a module or include for MPI Fortran interface?
@ -11646,7 +11677,8 @@ fi
MPF90 : ${MPF90} MPF90 : ${MPF90}
MPF77 : ${MPF77} MPF77 : ${MPF77}
MPCC : ${MPICC} MPCC : ${MPICC}
FLINK : ${FLINK} EXTRA90LINKOPT : ${EXTRA90LINKOPT}
EXTRALINKOPT : ${EXTRALINKOPT}
FDEFINES : ${FDEFINES} FDEFINES : ${FDEFINES}
CDEFINES : ${CDEFINES} CDEFINES : ${CDEFINES}
@ -11679,7 +11711,8 @@ $as_echo "$as_me:
MPF90 : ${MPF90} MPF90 : ${MPF90}
MPF77 : ${MPF77} MPF77 : ${MPF77}
MPCC : ${MPICC} MPCC : ${MPICC}
FLINK : ${FLINK} EXTRA90LINKOPT : ${EXTRA90LINKOPT}
EXTRALINKOPT : ${EXTRALINKOPT}
FDEFINES : ${FDEFINES} FDEFINES : ${FDEFINES}
CDEFINES : ${CDEFINES} CDEFINES : ${CDEFINES}

@ -504,7 +504,16 @@ fi
# should be valid options. # should be valid options.
# The same for pg (Portland Group compilers). # The same for pg (Portland Group compilers).
############################################################################### ###############################################################################
PAC_ARG_ENABLE_OPENMP
if test x"$pac_cv_openmp" == x"yes" ; then
FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES";
if test "X$psblas_cv_fc" == "Xgcc" ; then
F90COPT="$F90COPT -fopenmp";
FCOPT="$FCOPT -fopenmp";
EXTRALINKOPT=" -fopenmp";
EXTRA90LINKOPT=" -fopenmp";
fi
fi
############################################################################### ###############################################################################
# Custom test : do we have a module or include for MPI Fortran interface? # Custom test : do we have a module or include for MPI Fortran interface?
@ -763,8 +772,8 @@ AC_SUBST(FIFLAG)
AC_SUBST(FMFLAG) AC_SUBST(FMFLAG)
AC_SUBST(MODEXT) AC_SUBST(MODEXT)
AC_SUBST(MPIF77) AC_SUBST(MPIF77)
AC_SUBST(FLINK) AC_SUBST(EXTRALINKOPT)
AC_SUBST(F90LINK) AC_SUBST(EXTRA90LINKOPT)
AC_SUBST(LIBS) AC_SUBST(LIBS)
AC_SUBST(AR) AC_SUBST(AR)
AC_SUBST(RANLIB) AC_SUBST(RANLIB)
@ -829,8 +838,8 @@ AC_MSG_NOTICE([
MPF90 : ${MPF90} MPF90 : ${MPF90}
MPF77 : ${MPF77} MPF77 : ${MPF77}
MPCC : ${MPICC} MPCC : ${MPICC}
dnl F90LINK : ${F90LINK} EXTRA90LINKOPT : ${EXTRA90LINKOPT}
FLINK : ${FLINK} EXTRALINKOPT : ${EXTRALINKOPT}
FDEFINES : ${FDEFINES} FDEFINES : ${FDEFINES}
CDEFINES : ${CDEFINES} CDEFINES : ${CDEFINES}

@ -1,11 +1,11 @@
7 Number of entries below this 7 Number of entries below this
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES
BJAC Preconditioner NONE DIAG BJAC DIAG Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO JAD CSR Storage format for matrix A: CSR COO JAD
100 Domain size (acutal system is this**3) 100 Domain size (acutal system is this**3)
2 Stopping criterion 2 Stopping criterion
1000 MAXIT 1000 MAXIT
01 ITRACE -01 ITRACE
002 IRST restart for RGMRES and BiCGSTABL 002 IRST restart for RGMRES and BiCGSTABL

Loading…
Cancel
Save