psblas3/base/serial/jad/sjadmv4.f

359 lines
17 KiB
Fortran

C
C Parallel Sparse BLAS version 2.2
C (C) Copyright 2006/2007/2008
C Salvatore Filippone University of Rome Tor Vergata
C Alfredo Buttari University of Rome Tor Vergata
C
C Redistribution and use in source and binary forms, with or without
C modification, are permitted provided that the following conditions
C are met:
C 1. Redistributions of source code must retain the above copyright
C notice, this list of conditions and the following disclaimer.
C 2. Redistributions in binary form must reproduce the above copyright
C notice, this list of conditions, and the following disclaimer in the
C documentation and/or other materials provided with the distribution.
C 3. The name of the PSBLAS group or the names of its contributors may
C not be used to endorse or promote products derived from this
C software without specific written permission.
C
C THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
C ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
C TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
C PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
C BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
C CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
C SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
C INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
C CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
C ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
C POSSIBILITY OF SUCH DAMAGE.
C
C
***********************************************************************
* PROCEDURAL LOGIC SECTION *
* SUBROUTINE DJADMV (DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA,X,BETA,Y) *
* DOUBLE PRECISION ZERO *
* PARAMETER (ZERO=0.0D0) *
* DOUBLE PRECISION ACC *
* INTEGER I, J, K, IPX, IPG *
* LOGICAL UNI *
*C .. Executable Statements .. *
*C *
*C *
* IF (DIAG.EQ.'U') THEN *
* DO 10 I = 1, M *
* Y(I) = BETA*Y(I) + ALPHA*X(I) *
* 10 CONTINUE *
* ELSE *
* DO 20 I = 1, M *
* Y(I) = BETA*Y(I) *
* 20 CONTINUE *
* END IF *
* *
* IF (ALPHA.EQ.ZERO) THEN *
* RETURN *
* END IF *
*C *
*C *
*C DO 200 IPG = 1, NG *
* DO 50 K = IA(2,IPG), IA(3,IPG)-1 *
* IPX = IA(1,IPG) *
* DO 40 I = JA(K), JA(K+1) - 1 *
* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) *
* IPX = IPX + 1 *
* 40 CONTINUE *
* 50 CONTINUE *
*C *
*C *
* IPX = IA(1,IPG) *
* DO 70 K = IA(3,IPG), IA(2,IPG+1)-1 *
* DO 60 I = JA(K), JA(K+1) - 1 *
* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) *
* 60 CONTINUE *
* IPX = IPX + 1 *
* 70 CONTINUE *
* 200 CONTINUE *
*C *
* RETURN *
*C *
*C *
* END *
* *
* *
***********************************************************************
SUBROUTINE SJADMV4(DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA,
+ X,LDX,BETA,Y,LDY, IERROR)
use psb_const_mod
IMPLICIT NONE
INTEGER IA(3,*),KA(*),JA(*),NCOL,NROW,NG,LDX,LDY,IERROR
real(psb_spk_) A(*),X(LDX,*),Y(LDY,*),ALPHA,BETA
CHARACTER DIAG
INTEGER I, J, K, IPX, IPG, I0, IN
INTEGER NPG
integer nb
parameter (nb=4)
real(psb_spk_) Y0(NB), Y1(NB), Y2(NB), Y3(NB), Y4(NB),
+ Y5(NB), Y6(NB), Y7(NB), Y8(NB), Y9(NB), Y10(NB), Y11(NB),
+ Y12(NB), Y13(NB), Y14(NB), Y15(NB)
c .. Executable Statements ..
c
c
c$$$ write(0,*) 'djadmv2:',diag,alpha,beta,nb
IERROR=0
IF (DIAG.EQ.'U') THEN
IF (BETA.EQ.SZERO) THEN
DO I = 1, NROW
Y(I,1:NB) = ALPHA*X(I,1:NB)
ENDDO
ELSE
DO 10 I = 1, NROW
Y(I,1:NB) = BETA*Y(I,1:NB) + ALPHA*X(I,1:NB)
10 CONTINUE
ENDIF
ELSE
IF (BETA.EQ.SZERO) THEN
DO I = 1, NROW
Y(I,1:NB) = 0.D0
ENDDO
ELSE
DO 20 I = 1, NROW
Y(I,1:NB) = BETA*Y(I,1:NB)
20 CONTINUE
END IF
ENDIF
IF (ALPHA.EQ.SZERO) THEN
RETURN
END IF
c
c$$$ write(0,*) 'djadmv2:',diag,alpha,beta
do 200 ipg = 1, ng
k = ia(2,ipg)
npg = ja(k+1)-ja(k)
c$$$ write(0,*) 'djadmv2:',npg
if (npg.eq.4) then
ipx = ia(1,ipg)
y0(1:nb) = szero
y1(1:nb) = szero
y2(1:nb) = szero
y3(1:nb) = szero
k = ia(2,ipg)
i0 = ja(k)
k = ia(3,ipg)-1
in = ja(k)
do i = i0, in+3, 4
y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb)
y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb)
y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb)
y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb)
enddo
y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb)
y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb)
y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb)
y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb)
else if (npg.eq.5) then
ipx = ia(1,ipg)
y0(1:nb) = szero
y1(1:nb) = szero
y2(1:nb) = szero
y3(1:nb) = szero
y4(1:nb) = szero
k = ia(2,ipg)
i0 = ja(k)
k = ia(3,ipg)-1
in = ja(k)
do i = i0, in+4, 5
y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb)
y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb)
y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb)
y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb)
y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb)
enddo
y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb)
y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb)
y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb)
y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb)
y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb)
else if (npg.eq.6) then
ipx = ia(1,ipg)
y0(1:nb) = szero
y1(1:nb) = szero
y2(1:nb) = szero
y3(1:nb) = szero
y4(1:nb) = szero
y5(1:nb) = szero
k = ia(2,ipg)
i0 = ja(k)
k = ia(3,ipg)-1
in = ja(k)
do i = i0, in+5, 6
y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb)
y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb)
y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb)
y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb)
y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb)
y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb)
enddo
y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb)
y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb)
y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb)
y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb)
y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb)
y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb)
else if (npg.eq.7) then
ipx = ia(1,ipg)
y0(1:nb) = szero
y1(1:nb) = szero
y2(1:nb) = szero
y3(1:nb) = szero
y4(1:nb) = szero
y5(1:nb) = szero
y6(1:nb) = szero
k = ia(2,ipg)
i0 = ja(k)
k = ia(3,ipg)-1
in = ja(k)
do i = i0, in+6, 7
y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb)
y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb)
y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb)
y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb)
y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb)
y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb)
y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb)
enddo
y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb)
y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb)
y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb)
y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb)
y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb)
y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb)
y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb)
else if (npg.eq.8) then
ipx = ia(1,ipg)
y0(1:nb) = szero
y1(1:nb) = szero
y2(1:nb) = szero
y3(1:nb) = szero
y4(1:nb) = szero
y5(1:nb) = szero
y6(1:nb) = szero
y7(1:nb) = szero
k = ia(2,ipg)
i0 = ja(k)
k = ia(3,ipg)-1
in = ja(k)
do i = i0, in+7, 8
y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb)
y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb)
y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb)
y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb)
y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb)
y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb)
y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb)
y7(1:nb) = y7(1:nb) + a(i+7)*x(ka(i+7),1:nb)
enddo
y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb)
y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb)
y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb)
y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb)
y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb)
y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb)
y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb)
y(ipx+7,1:nb) = y(ipx+7,1:nb) + alpha*y7(1:nb)
else if (npg.eq.16) then
ipx = ia(1,ipg)
y0(1:nb) = szero
y1(1:nb) = szero
y2(1:nb) = szero
y3(1:nb) = szero
y4(1:nb) = szero
y5(1:nb) = szero
y6(1:nb) = szero
y7(1:nb) = szero
y8(1:nb) = szero
y9(1:nb) = szero
y10(1:nb) = szero
y11(1:nb) = szero
y12(1:nb) = szero
y13(1:nb) = szero
y14(1:nb) = szero
y15(1:nb) = szero
k = ia(2,ipg)
i0 = ja(k)
k = ia(3,ipg)-1
in = ja(k)
do i = i0, in+15, 16
y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb)
y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb)
y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb)
y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb)
y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb)
y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb)
y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb)
y7(1:nb) = y7(1:nb) + a(i+7)*x(ka(i+7),1:nb)
y8(1:nb) = y8(1:nb) + a(i+8)*x(ka(i+8),1:nb)
y9(1:nb) = y9(1:nb) + a(i+9)*x(ka(i+9),1:nb)
y10(1:nb) = y10(1:nb) + a(i+10)*x(ka(i+10),1:nb)
y11(1:nb) = y11(1:nb) + a(i+11)*x(ka(i+11),1:nb)
y12(1:nb) = y12(1:nb) + a(i+12)*x(ka(i+12),1:nb)
y13(1:nb) = y13(1:nb) + a(i+13)*x(ka(i+13),1:nb)
y14(1:nb) = y14(1:nb) + a(i+14)*x(ka(i+14),1:nb)
y15(1:nb) = y15(1:nb) + a(i+15)*x(ka(i+15),1:nb)
enddo
y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb)
y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb)
y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb)
y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb)
y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb)
y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb)
y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb)
y(ipx+7,1:nb) = y(ipx+7,1:nb) + alpha*y7(1:nb)
y(ipx+8,1:nb) = y(ipx+8,1:nb) + alpha*y8(1:nb)
y(ipx+9,1:nb) = y(ipx+9,1:nb) + alpha*y9(1:nb)
y(ipx+10,1:nb) = y(ipx+10,1:nb) + alpha*y10(1:nb)
y(ipx+11,1:nb) = y(ipx+11,1:nb) + alpha*y11(1:nb)
y(ipx+12,1:nb) = y(ipx+12,1:nb) + alpha*y12(1:nb)
y(ipx+13,1:nb) = y(ipx+13,1:nb) + alpha*y13(1:nb)
y(ipx+14,1:nb) = y(ipx+14,1:nb) + alpha*y14(1:nb)
y(ipx+15,1:nb) = y(ipx+15,1:nb) + alpha*y15(1:nb)
else
do k = ia(2,ipg), ia(3,ipg)-1
ipx = ia(1,ipg)
do i = ja(k), ja(k+1) - 1
y(ipx,1:nb) = y(ipx,1:nb) + alpha*a(i)*x(ka(i),1:nb)
ipx = ipx + 1
enddo
enddo
end if
c csr product
ipx = ia(1,ipg)
do 70 k = ia(3,ipg), ia(2,ipg+1)-1
do 60 i = ja(k), ja(k+1) - 1
y(ipx,1:nb) = y(ipx,1:nb) + alpha*a(i)*x(ka(i),1:nb)
60 continue
ipx = ipx + 1
70 continue
200 continue
c
return
end