diff --git a/Make.inc.in b/Make.inc.in index bcb496e1..6dd4ae54 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -25,9 +25,11 @@ EXTRA_OPT=@EXTRA_OPT@ MPF90=@MPF90@ MPF77=@MPF77@ MPCC=@MPCC@ +EXTRA90LINKOPT=@EXTRA90LINKOPT@ +EXTRALINKOPT=@EXTRALINKOPT@ -F90LINK=$(MPF90) -FLINK=$(MPF77) +F90LINK=$(MPF90) $(EXTRA90LINKOPT) +FLINK=$(MPF77) $(EXTRALINKOPT) LIBS=@LIBS@ diff --git a/base/modules/psb_d_base_vect_mod.f90 b/base/modules/psb_d_base_vect_mod.f90 index 167e8445..7acea03d 100644 --- a/base/modules/psb_d_base_vect_mod.f90 +++ b/base/modules/psb_d_base_vect_mod.f90 @@ -769,6 +769,7 @@ contains select type(xx => x) type is (psb_d_base_vect_type) n = min(size(y%v), size(xx%v)) + !$omp parallel do private(i) schedule(static,64) do i=1, n y%v(i) = y%v(i)*xx%v(i) end do @@ -795,6 +796,7 @@ contains info = 0 n = min(size(y%v), size(x)) + !$omp parallel do private(i) schedule(static,64) do i=1, n y%v(i) = y%v(i)*x(i) end do @@ -830,6 +832,7 @@ contains if (beta == done) then return else + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = beta*z%v(i) end do @@ -837,42 +840,51 @@ contains else if (alpha == done) then if (beta == dzero) then + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = z%v(i) + y(i)*x(i) end do else + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = beta*z%v(i) + y(i)*x(i) end do end if else if (alpha == -done) then if (beta == dzero) then + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = -y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = z%v(i) - y(i)*x(i) end do else + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = beta*z%v(i) - y(i)*x(i) end do end if else if (beta == dzero) then + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = alpha*y(i)*x(i) end do else if (beta == done) then + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = z%v(i) + alpha*y(i)*x(i) end do else + !$omp parallel do private(i) schedule(static,64) do i=1, n z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) end do diff --git a/base/serial/f77/daxpby.f b/base/serial/f77/daxpby.f index b30a986d..41265f11 100644 --- a/base/serial/f77/daxpby.f +++ b/base/serial/f77/daxpby.f @@ -77,6 +77,7 @@ C if (alpha.eq.dzero) then if (beta.eq.dzero) then do j=1, n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = dzero enddo @@ -88,12 +89,14 @@ c$$$ else if (beta.eq.-done) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = - y(i,j) enddo enddo else do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = beta*y(i,j) enddo @@ -104,12 +107,14 @@ c$$$ if (beta.eq.dzero) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = x(i,j) + y(i,j) enddo @@ -117,12 +122,14 @@ c$$$ else if (beta.eq.-done) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = x(i,j) - y(i,j) enddo enddo else do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = x(i,j) + beta*y(i,j) enddo @@ -133,12 +140,14 @@ c$$$ if (beta.eq.dzero) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = -x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = -x(i,j) + y(i,j) enddo @@ -146,12 +155,14 @@ c$$$ else if (beta.eq.-done) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = -x(i,j) - y(i,j) enddo enddo else do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = -x(i,j) + beta*y(i,j) enddo @@ -162,12 +173,14 @@ c$$$ if (beta.eq.dzero) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = alpha*x(i,j) enddo enddo else if (beta.eq.done) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = alpha*x(i,j) + y(i,j) enddo @@ -175,12 +188,14 @@ c$$$ else if (beta.eq.-done) then do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = alpha*x(i,j) - y(i,j) enddo enddo else do j=1,n +C$omp parallel do private(i) schedule(static,64) do i=1,m y(i,j) = alpha*x(i,j) + beta*y(i,j) enddo diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 96dee37d..00fd750b 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -152,6 +152,7 @@ contains if (beta == dzero) then if (alpha == done) then + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -162,6 +163,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -172,6 +174,7 @@ contains else + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -186,6 +189,7 @@ contains else if (beta == done) then if (alpha == done) then + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -196,6 +200,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -206,6 +211,7 @@ contains else + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -219,6 +225,7 @@ contains else if (beta == -done) then if (alpha == done) then + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -229,6 +236,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -239,6 +247,7 @@ contains else + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -252,6 +261,7 @@ contains else if (alpha == done) then + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -262,6 +272,7 @@ contains else if (alpha == -done) then + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 @@ -272,6 +283,7 @@ contains else + !$omp parallel do private(i,acc,j) schedule(static,64) do i=1,m acc = dzero do j=irp(i), irp(i+1)-1 diff --git a/config/pac.m4 b/config/pac.m4 index 410d8bea..36e6705f 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -358,6 +358,37 @@ else fi ] ) +dnl @synopsis PAC_ARG_ENABLE_OPENMP +dnl +dnl Test for --enable-openmp +dnl +dnl +dnl +dnl Example use: +dnl +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_ENABLE_OPENMP], +[ +AC_MSG_CHECKING([whether we want serial mpi stubs]) +AC_ARG_ENABLE(openmp, +AC_HELP_STRING([--enable-openmp], +[Specify whether to enable OPENMP compilation ]), +[ +pac_cv_openmp="yes"; +] +dnl , +dnl [pac_cv_openmp="no";] +) +if test x"$pac_cv_openmp" == x"yes" ; then + AC_MSG_RESULT([yes.]) +else + pac_cv_openmp="no"; + AC_MSG_RESULT([no.]) +fi +] +) dnl @synopsis PAC_ARG_LONG_INTEGERS dnl diff --git a/configure b/configure index 624f3f93..9e9a3c40 100755 --- a/configure +++ b/configure @@ -658,8 +658,8 @@ INSTALL_DIR INSTALL MPIFCC AR -F90LINK -FLINK +EXTRA90LINKOPT +EXTRALINKOPT MODEXT FMFLAG FIFLAG @@ -785,6 +785,7 @@ with_library_path with_include_path with_module_path enable_dependency_tracking +enable_openmp enable_long_integers with_blas with_lapack @@ -1441,6 +1442,7 @@ Optional Features: in serial mode. --disable-dependency-tracking speeds up one-time build --enable-dependency-tracking do not reject slow dependency extractors + --enable-openmp Specify whether to enable OPENMP compilation --enable-long-integers Specify usage of 64 bits integers. Optional Packages: @@ -7367,6 +7369,35 @@ fi # The same for pg (Portland Group compilers). ############################################################################### +{ $as_echo "$as_me:$LINENO: checking whether we want serial mpi stubs" >&5 +$as_echo_n "checking whether we want serial mpi stubs... " >&6; } +# Check whether --enable-openmp was given. +if test "${enable_openmp+set}" = set; then + enableval=$enable_openmp; +pac_cv_openmp="yes"; + + +fi + +if test x"$pac_cv_openmp" == x"yes" ; then + { $as_echo "$as_me:$LINENO: result: yes." >&5 +$as_echo "yes." >&6; } +else + pac_cv_openmp="no"; + { $as_echo "$as_me:$LINENO: result: no." >&5 +$as_echo "no." >&6; } +fi + + +if test x"$pac_cv_openmp" == x"yes" ; then + FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES"; + if test "X$psblas_cv_fc" == "Xgcc" ; then + F90COPT="$F90COPT -fopenmp"; + FCOPT="$FCOPT -fopenmp"; + EXTRALINKOPT=" -fopenmp"; + EXTRA90LINKOPT=" -fopenmp"; + fi +fi ############################################################################### # Custom test : do we have a module or include for MPI Fortran interface? @@ -11646,7 +11677,8 @@ fi MPF90 : ${MPF90} MPF77 : ${MPF77} MPCC : ${MPICC} - FLINK : ${FLINK} + EXTRA90LINKOPT : ${EXTRA90LINKOPT} + EXTRALINKOPT : ${EXTRALINKOPT} FDEFINES : ${FDEFINES} CDEFINES : ${CDEFINES} @@ -11679,7 +11711,8 @@ $as_echo "$as_me: MPF90 : ${MPF90} MPF77 : ${MPF77} MPCC : ${MPICC} - FLINK : ${FLINK} + EXTRA90LINKOPT : ${EXTRA90LINKOPT} + EXTRALINKOPT : ${EXTRALINKOPT} FDEFINES : ${FDEFINES} CDEFINES : ${CDEFINES} diff --git a/configure.ac b/configure.ac index e7c33825..88c637e1 100755 --- a/configure.ac +++ b/configure.ac @@ -504,7 +504,16 @@ fi # should be valid options. # The same for pg (Portland Group compilers). ############################################################################### - +PAC_ARG_ENABLE_OPENMP +if test x"$pac_cv_openmp" == x"yes" ; then + FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES"; + if test "X$psblas_cv_fc" == "Xgcc" ; then + F90COPT="$F90COPT -fopenmp"; + FCOPT="$FCOPT -fopenmp"; + EXTRALINKOPT=" -fopenmp"; + EXTRA90LINKOPT=" -fopenmp"; + fi +fi ############################################################################### # Custom test : do we have a module or include for MPI Fortran interface? @@ -763,8 +772,8 @@ AC_SUBST(FIFLAG) AC_SUBST(FMFLAG) AC_SUBST(MODEXT) AC_SUBST(MPIF77) -AC_SUBST(FLINK) -AC_SUBST(F90LINK) +AC_SUBST(EXTRALINKOPT) +AC_SUBST(EXTRA90LINKOPT) AC_SUBST(LIBS) AC_SUBST(AR) AC_SUBST(RANLIB) @@ -829,8 +838,8 @@ AC_MSG_NOTICE([ MPF90 : ${MPF90} MPF77 : ${MPF77} MPCC : ${MPICC} -dnl F90LINK : ${F90LINK} - FLINK : ${FLINK} + EXTRA90LINKOPT : ${EXTRA90LINKOPT} + EXTRALINKOPT : ${EXTRALINKOPT} FDEFINES : ${FDEFINES} CDEFINES : ${CDEFINES} diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index ad9aac03..481da4ba 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -1,11 +1,11 @@ 7 Number of entries below this BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES -BJAC Preconditioner NONE DIAG BJAC +DIAG Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO JAD 100 Domain size (acutal system is this**3) 2 Stopping criterion 1000 MAXIT -01 ITRACE +-01 ITRACE 002 IRST restart for RGMRES and BiCGSTABL