Changed pdegen test programs: generate a bunch of rows at a time, with
a better strategy for looping on the local rows.
stopcriterion
Salvatore Filippone 17 years ago
parent 20ca6eb29a
commit 4c77936f87

@ -129,7 +129,8 @@ program mld_dexample_1lev
call psb_barrier(ictxt) call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call create_matrix(idim,A,b,x,desc_A,part_block,ictxt,info) call create_matrix(idim,a,b,x,desc_a,part_block,ictxt,info)
call psb_barrier(ictxt)
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
@ -137,8 +138,7 @@ program mld_dexample_1lev
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2) if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es10.4)')t2
if (iam == psb_root_) write(*,'(" ")') if (iam == psb_root_) write(*,'(" ")')
! set RAS with overlap 2 and ILU(0) on the local blocks ! set RAS with overlap 2 and ILU(0) on the local blocks
@ -199,13 +199,13 @@ program mld_dexample_1lev
write(*,'("Matrix from PDE example")') write(*,'("Matrix from PDE example")')
write(*,'("Computed solution on ",i8," processors")')np write(*,'("Computed solution on ",i8," processors")')np
write(*,'("Iterations to convergence : ",i6)')iter write(*,'("Iterations to convergence : ",i6)')iter
write(*,'("Error estimate on exit : ",es10.4)')err write(*,'("Error estimate on exit : ",es12.5)')err
write(*,'("Time to build prec. : ",es10.4)')tprec write(*,'("Time to build prec. : ",es12.5)')tprec
write(*,'("Time to solve system : ",es10.4)')t2 write(*,'("Time to solve system : ",es12.5)')t2
write(*,'("Time per iteration : ",es10.4)')t2/(iter) write(*,'("Time per iteration : ",es12.5)')t2/(iter)
write(*,'("Total time : ",es10.4)')t2+tprec write(*,'("Total time : ",es12.5)')t2+tprec
write(*,'("Residual 2-norm : ",es10.4)')resmx write(*,'("Residual 2-norm : ",es12.5)')resmx
write(*,'("Residual inf-norm : ",es10.4)')resmxp write(*,'("Residual inf-norm : ",es12.5)')resmxp
write(*,'("Total memory occupation for A : ",i10)')amatsize write(*,'("Total memory occupation for A : ",i10)')amatsize
write(*,'("Total memory occupation for DESC_A : ",i10)')descsize write(*,'("Total memory occupation for DESC_A : ",i10)')descsize
write(*,'("Total memory occupation for PREC : ",i10)')precsize write(*,'("Total memory occupation for PREC : ",i10)')precsize
@ -276,7 +276,7 @@ contains
use psb_base_mod use psb_base_mod
implicit none implicit none
integer :: idim integer :: idim
integer, parameter :: nbmax=10 integer, parameter :: nb=20
real(psb_dpk_), allocatable :: b(:),xv(:) real(psb_dpk_), allocatable :: b(:),xv(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer :: ictxt, info integer :: ictxt, info
@ -291,18 +291,18 @@ contains
end interface end interface
! local variables ! local variables
type(psb_dspmat_type) :: a type(psb_dspmat_type) :: a
real(psb_dpk_) :: zt(nbmax),glob_x,glob_y,glob_z real(psb_dpk_) :: zt(nb),glob_x,glob_y,glob_z
integer :: m,n,nnz,glob_row,ipoints integer :: m,n,nnz,glob_row,nlr,i,ii,ib,k,ipoints
integer :: x,y,z,ia,indx_owner integer :: x,y,z,ia,indx_owner
integer :: np, iam integer :: np, iam
integer :: element integer :: element
integer, allocatable :: irow(:),icol(:) integer, allocatable :: irow(:),icol(:),myidx(:)
real(psb_dpk_), allocatable :: val(:) real(psb_dpk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_dpk_) :: deltah real(psb_dpk_) :: deltah
real(psb_dpk_),parameter :: rhs=0.d0,one=1.d0,zero=0.d0 real(psb_dpk_),parameter :: rhs=0.d0,one=1.d0,zero=0.d0
real(psb_dpk_) :: t1, t2, t3, tins, tasb real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen
real(psb_dpk_) :: a1, a2, a3, a4, b1, b2, b3 real(psb_dpk_) :: a1, a2, a3, a4, b1, b2, b3
external :: a1, a2, a3, a4, b1, b2, b3 external :: a1, a2, a3, a4, b1, b2, b3
integer :: err_act integer :: err_act
@ -326,11 +326,17 @@ contains
nnz = ((n*9)/(np)) nnz = ((n*9)/(np))
if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n
call psb_barrier(ictxt)
t0 = psb_wtime()
call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts) call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts)
if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz) if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz)
! define rhs from boundary conditions; also build initial guess ! define rhs from boundary conditions; also build initial guess
if (info == 0) call psb_geall(b,desc_a,info) if (info == 0) call psb_geall(b,desc_a,info)
if (info == 0) call psb_geall(xv,desc_a,info) if (info == 0) call psb_geall(xv,desc_a,info)
nlr = psb_cd_get_local_rows(desc_a)
call psb_barrier(ictxt)
talc = psb_wtime()-t0
if (info /= 0) then if (info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
@ -341,26 +347,33 @@ contains
! time; just a small matrix. might be extended to generate ! time; just a small matrix. might be extended to generate
! a bunch of rows per call. ! a bunch of rows per call.
! !
allocate(val(20*nbmax),irow(20*nbmax),& allocate(val(20*nb),irow(20*nb),&
&icol(20*nbmax),stat=info) &icol(20*nb),myidx(nlr),stat=info)
if (info /= 0 ) then if (info /= 0 ) then
info=4000 info=4000
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
endif endif
tins = 0.d0 do i=1,nlr
call psb_barrier(ictxt) myidx(i) = i
t1 = psb_wtime() end do
call psb_loc_to_glob(myidx,desc_a,info)
! loop over rows belonging to current process in a block ! loop over rows belonging to current process in a block
! distribution. ! distribution.
do glob_row = 1, n call psb_barrier(ictxt)
! Figure out which rows are local to the current process: t1 = psb_wtime()
if (psb_is_owned(glob_row,desc_a)) then do ii=1, nlr,nb
ib = min(nb,nlr-ii+1)
element = 1
do k=1,ib
i=ii+k-1
! local matrix pointer ! local matrix pointer
element=1 glob_row=myidx(i)
! compute gridpoint coordinates ! compute gridpoint coordinates
if (mod(glob_row,ipoints*ipoints) == 0) then if (mod(glob_row,ipoints*ipoints) == 0) then
x = glob_row/(ipoints*ipoints) x = glob_row/(ipoints*ipoints)
@ -379,7 +392,7 @@ contains
glob_z=z*deltah glob_z=z*deltah
! check on boundary points ! check on boundary points
zt(1) = 0.d0 zt(k) = 0.d0
! internal point: build discretization ! internal point: build discretization
! !
! term depending on (x-1,y,z) ! term depending on (x-1,y,z)
@ -389,14 +402,15 @@ contains
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z)& val(element)=-b1(glob_x,glob_y,glob_z)&
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-2)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x-2)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y-1,z) ! term depending on (x,y-1,z)
if (y==1) then if (y==1) then
@ -404,14 +418,15 @@ contains
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_x**2-glob_z**2)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z)& val(element)=-b2(glob_x,glob_y,glob_z)&
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-2)*ipoints+(z) icol(element) = (x-1)*ipoints*ipoints+(y-2)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z-1) ! term depending on (x,y,z-1)
if (z==1) then if (z==1) then
@ -419,14 +434,15 @@ contains
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_y**2)*(-val(element)) zt(k) = exp(-glob_x**2-glob_y**2)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z)& val(element)=-b3(glob_x,glob_y,glob_z)&
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z-1) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z-1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z) ! term depending on (x,y,z)
val(element)=2*b1(glob_x,glob_y,glob_z)& val(element)=2*b1(glob_x,glob_y,glob_z)&
@ -437,65 +453,63 @@ contains
& +a3(glob_x,glob_y,glob_z) & +a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
! term depending on (x,y,z+1) ! term depending on (x,y,z+1)
if (z==ipoints) then if (z==ipoints) then
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_y**2)*exp(-glob_z)*(-val(element)) zt(k) = exp(-glob_x**2-glob_y**2)*exp(-glob_z)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z+1) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z+1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y+1,z) ! term depending on (x,y+1,z)
if (y==ipoints) then if (y==ipoints) then
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_z**2)*exp(-glob_y)*(-val(element)) zt(k) = exp(-glob_x**2-glob_z**2)*exp(-glob_y)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y)*ipoints+(z) icol(element)=(x-1)*ipoints*ipoints+(y)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x+1,y,z) ! term depending on (x+1,y,z)
if (x==ipoints) then if (x==ipoints) then
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
irow(1:element-1)=glob_row
ia=glob_row
t3 = psb_wtime() end do
call psb_spins(element-1,irow,icol,val,a,desc_a,info) call psb_spins(element-1,irow,icol,val,a,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
tins = tins + (psb_wtime()-t3) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),b,desc_a,info)
call psb_geins(1,(/ia/),zt(1:1),b,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
zt(1)=0.d0 zt(:)=0.d0
call psb_geins(1,(/ia/),zt(1:1),xv,desc_a,info) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
end if
end do end do
call psb_barrier(ictxt) tgen = psb_wtime()-t1
t2 = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
@ -504,29 +518,17 @@ contains
deallocate(val,irow,icol) deallocate(val,irow,icol)
call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)
call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_) if (info == 0) &
& call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_)
call psb_barrier(ictxt) call psb_barrier(ictxt)
tasb = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2)
call psb_amx(ictxt,tins)
call psb_amx(ictxt,tasb)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-pspins time : ",es10.4)')tins
write(*,'("-insert time : ",es10.4)')t2
write(*,'("-assembly time : ",es10.4)')tasb
end if
call psb_geasb(b,desc_a,info) call psb_geasb(b,desc_a,info)
call psb_geasb(xv,desc_a,info) call psb_geasb(xv,desc_a,info)
if(info /= 0) then if(info /= 0) then
@ -534,7 +536,23 @@ contains
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
tasb = psb_wtime()-t1
call psb_barrier(ictxt)
ttot = psb_wtime() - t0
call psb_amx(ictxt,talc)
call psb_amx(ictxt,tgen)
call psb_amx(ictxt,tasb)
call psb_amx(ictxt,ttot)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-allocation time : ",es12.5)') talc
write(*,'("-coeff. gen. time : ",es12.5)') tgen
write(*,'("-assembly time : ",es12.5)') tasb
write(*,'("-total time : ",es12.5)') ttot
end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -546,7 +564,7 @@ contains
end if end if
return return
end subroutine create_matrix end subroutine create_matrix
end program mld_dexample_1lev end program
! !
! functions parametrizing the differential equation ! functions parametrizing the differential equation
! !

@ -134,6 +134,7 @@ program mld_dexample_ml
call psb_barrier(ictxt) call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call create_matrix(idim,A,b,x,desc_A,part_block,ictxt,info) call create_matrix(idim,A,b,x,desc_A,part_block,ictxt,info)
call psb_barrier(ictxt)
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
@ -141,8 +142,7 @@ program mld_dexample_ml
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2) if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es10.4)')t2
if (iam == psb_root_) write(*,'(" ")') if (iam == psb_root_) write(*,'(" ")')
select case(choice) select case(choice)
@ -235,13 +235,13 @@ program mld_dexample_ml
write(*,'("Matrix from PDE example")') write(*,'("Matrix from PDE example")')
write(*,'("Computed solution on ",i8," processors")')np write(*,'("Computed solution on ",i8," processors")')np
write(*,'("Iterations to convergence : ",i6)')iter write(*,'("Iterations to convergence : ",i6)')iter
write(*,'("Error estimate on exit : ",es10.4)')err write(*,'("Error estimate on exit : ",es12.5)')err
write(*,'("Time to build prec. : ",es10.4)')tprec write(*,'("Time to build prec. : ",es12.5)')tprec
write(*,'("Time to solve system : ",es10.4)')t2 write(*,'("Time to solve system : ",es12.5)')t2
write(*,'("Time per iteration : ",es10.4)')t2/(iter) write(*,'("Time per iteration : ",es12.5)')t2/(iter)
write(*,'("Total time : ",es10.4)')t2+tprec write(*,'("Total time : ",es12.5)')t2+tprec
write(*,'("Residual 2-norm : ",es10.4)')resmx write(*,'("Residual 2-norm : ",es12.5)')resmx
write(*,'("Residual inf-norm : ",es10.4)')resmxp write(*,'("Residual inf-norm : ",es12.5)')resmxp
write(*,'("Total memory occupation for A : ",i10)')amatsize write(*,'("Total memory occupation for A : ",i10)')amatsize
write(*,'("Total memory occupation for DESC_A : ",i10)')descsize write(*,'("Total memory occupation for DESC_A : ",i10)')descsize
write(*,'("Total memory occupation for PREC : ",i10)')precsize write(*,'("Total memory occupation for PREC : ",i10)')precsize
@ -314,7 +314,7 @@ contains
use psb_base_mod use psb_base_mod
implicit none implicit none
integer :: idim integer :: idim
integer, parameter :: nbmax=10 integer, parameter :: nb=20
real(psb_dpk_), allocatable :: b(:),xv(:) real(psb_dpk_), allocatable :: b(:),xv(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer :: ictxt, info integer :: ictxt, info
@ -329,18 +329,18 @@ contains
end interface end interface
! local variables ! local variables
type(psb_dspmat_type) :: a type(psb_dspmat_type) :: a
real(psb_dpk_) :: zt(nbmax),glob_x,glob_y,glob_z real(psb_dpk_) :: zt(nb),glob_x,glob_y,glob_z
integer :: m,n,nnz,glob_row,ipoints integer :: m,n,nnz,glob_row,nlr,i,ii,ib,k,ipoints
integer :: x,y,z,ia,indx_owner integer :: x,y,z,ia,indx_owner
integer :: np, iam integer :: np, iam
integer :: element integer :: element
integer, allocatable :: irow(:),icol(:) integer, allocatable :: irow(:),icol(:),myidx(:)
real(psb_dpk_), allocatable :: val(:) real(psb_dpk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_dpk_) :: deltah real(psb_dpk_) :: deltah
real(psb_dpk_),parameter :: rhs=0.d0,one=1.d0,zero=0.d0 real(psb_dpk_),parameter :: rhs=0.d0,one=1.d0,zero=0.d0
real(psb_dpk_) :: t1, t2, t3, tins, tasb real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen
real(psb_dpk_) :: a1, a2, a3, a4, b1, b2, b3 real(psb_dpk_) :: a1, a2, a3, a4, b1, b2, b3
external :: a1, a2, a3, a4, b1, b2, b3 external :: a1, a2, a3, a4, b1, b2, b3
integer :: err_act integer :: err_act
@ -364,11 +364,17 @@ contains
nnz = ((n*9)/(np)) nnz = ((n*9)/(np))
if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n
call psb_barrier(ictxt)
t0 = psb_wtime()
call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts) call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts)
if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz) if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz)
! define rhs from boundary conditions; also build initial guess ! define rhs from boundary conditions; also build initial guess
if (info == 0) call psb_geall(b,desc_a,info) if (info == 0) call psb_geall(b,desc_a,info)
if (info == 0) call psb_geall(xv,desc_a,info) if (info == 0) call psb_geall(xv,desc_a,info)
nlr = psb_cd_get_local_rows(desc_a)
call psb_barrier(ictxt)
talc = psb_wtime()-t0
if (info /= 0) then if (info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
@ -379,26 +385,33 @@ contains
! time; just a small matrix. might be extended to generate ! time; just a small matrix. might be extended to generate
! a bunch of rows per call. ! a bunch of rows per call.
! !
allocate(val(20*nbmax),irow(20*nbmax),& allocate(val(20*nb),irow(20*nb),&
&icol(20*nbmax),stat=info) &icol(20*nb),myidx(nlr),stat=info)
if (info /= 0 ) then if (info /= 0 ) then
info=4000 info=4000
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
endif endif
tins = 0.d0 do i=1,nlr
call psb_barrier(ictxt) myidx(i) = i
t1 = psb_wtime() end do
call psb_loc_to_glob(myidx,desc_a,info)
! loop over rows belonging to current process in a block ! loop over rows belonging to current process in a block
! distribution. ! distribution.
do glob_row = 1, n call psb_barrier(ictxt)
! Figure out which rows are local to the current process: t1 = psb_wtime()
if (psb_is_owned(glob_row,desc_a)) then do ii=1, nlr,nb
ib = min(nb,nlr-ii+1)
element = 1
do k=1,ib
i=ii+k-1
! local matrix pointer ! local matrix pointer
element=1 glob_row=myidx(i)
! compute gridpoint coordinates ! compute gridpoint coordinates
if (mod(glob_row,ipoints*ipoints) == 0) then if (mod(glob_row,ipoints*ipoints) == 0) then
x = glob_row/(ipoints*ipoints) x = glob_row/(ipoints*ipoints)
@ -417,7 +430,7 @@ contains
glob_z=z*deltah glob_z=z*deltah
! check on boundary points ! check on boundary points
zt(1) = 0.d0 zt(k) = 0.d0
! internal point: build discretization ! internal point: build discretization
! !
! term depending on (x-1,y,z) ! term depending on (x-1,y,z)
@ -427,14 +440,15 @@ contains
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z)& val(element)=-b1(glob_x,glob_y,glob_z)&
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-2)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x-2)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y-1,z) ! term depending on (x,y-1,z)
if (y==1) then if (y==1) then
@ -442,14 +456,15 @@ contains
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_x**2-glob_z**2)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z)& val(element)=-b2(glob_x,glob_y,glob_z)&
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-2)*ipoints+(z) icol(element) = (x-1)*ipoints*ipoints+(y-2)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z-1) ! term depending on (x,y,z-1)
if (z==1) then if (z==1) then
@ -457,14 +472,15 @@ contains
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_y**2)*(-val(element)) zt(k) = exp(-glob_x**2-glob_y**2)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z)& val(element)=-b3(glob_x,glob_y,glob_z)&
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z-1) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z-1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z) ! term depending on (x,y,z)
val(element)=2*b1(glob_x,glob_y,glob_z)& val(element)=2*b1(glob_x,glob_y,glob_z)&
@ -475,65 +491,63 @@ contains
& +a3(glob_x,glob_y,glob_z) & +a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
! term depending on (x,y,z+1) ! term depending on (x,y,z+1)
if (z==ipoints) then if (z==ipoints) then
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_y**2)*exp(-glob_z)*(-val(element)) zt(k) = exp(-glob_x**2-glob_y**2)*exp(-glob_z)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z+1) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z+1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y+1,z) ! term depending on (x,y+1,z)
if (y==ipoints) then if (y==ipoints) then
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_z**2)*exp(-glob_y)*(-val(element)) zt(k) = exp(-glob_x**2-glob_z**2)*exp(-glob_y)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y)*ipoints+(z) icol(element)=(x-1)*ipoints*ipoints+(y)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x+1,y,z) ! term depending on (x+1,y,z)
if (x==ipoints) then if (x==ipoints) then
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
irow(1:element-1)=glob_row
ia=glob_row
t3 = psb_wtime() end do
call psb_spins(element-1,irow,icol,val,a,desc_a,info) call psb_spins(element-1,irow,icol,val,a,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
tins = tins + (psb_wtime()-t3) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),b,desc_a,info)
call psb_geins(1,(/ia/),zt(1:1),b,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
zt(1)=0.d0 zt(:)=0.d0
call psb_geins(1,(/ia/),zt(1:1),xv,desc_a,info) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
end if
end do end do
call psb_barrier(ictxt) tgen = psb_wtime()-t1
t2 = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
@ -542,29 +556,17 @@ contains
deallocate(val,irow,icol) deallocate(val,irow,icol)
call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)
call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_) if (info == 0) &
& call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_)
call psb_barrier(ictxt) call psb_barrier(ictxt)
tasb = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2)
call psb_amx(ictxt,tins)
call psb_amx(ictxt,tasb)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-pspins time : ",es10.4)')tins
write(*,'("-insert time : ",es10.4)')t2
write(*,'("-assembly time : ",es10.4)')tasb
end if
call psb_geasb(b,desc_a,info) call psb_geasb(b,desc_a,info)
call psb_geasb(xv,desc_a,info) call psb_geasb(xv,desc_a,info)
if(info /= 0) then if(info /= 0) then
@ -572,7 +574,23 @@ contains
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
tasb = psb_wtime()-t1
call psb_barrier(ictxt)
ttot = psb_wtime() - t0
call psb_amx(ictxt,talc)
call psb_amx(ictxt,tgen)
call psb_amx(ictxt,tasb)
call psb_amx(ictxt,ttot)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-allocation time : ",es12.5)') talc
write(*,'("-coeff. gen. time : ",es12.5)') tgen
write(*,'("-assembly time : ",es12.5)') tasb
write(*,'("-total time : ",es12.5)') ttot
end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

@ -130,7 +130,8 @@ program mld_sexample_1lev
call psb_barrier(ictxt) call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call create_matrix(idim,A,b,x,desc_A,part_block,ictxt,info) call create_matrix(idim,a,b,x,desc_a,part_block,ictxt,info)
call psb_barrier(ictxt)
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
@ -138,8 +139,7 @@ program mld_sexample_1lev
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2) if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es10.4)')t2
if (iam == psb_root_) write(*,'(" ")') if (iam == psb_root_) write(*,'(" ")')
! set RAS with overlap 2 and ILU(0) on the local blocks ! set RAS with overlap 2 and ILU(0) on the local blocks
@ -200,13 +200,13 @@ program mld_sexample_1lev
write(*,'("Matrix from PDE example")') write(*,'("Matrix from PDE example")')
write(*,'("Computed solution on ",i8," processors")')np write(*,'("Computed solution on ",i8," processors")')np
write(*,'("Iterations to convergence : ",i6)')iter write(*,'("Iterations to convergence : ",i6)')iter
write(*,'("Error estimate on exit : ",es10.4)')err write(*,'("Error estimate on exit : ",es12.5)')err
write(*,'("Time to build prec. : ",es10.4)')tprec write(*,'("Time to build prec. : ",es12.5)')tprec
write(*,'("Time to solve system : ",es10.4)')t2 write(*,'("Time to solve system : ",es12.5)')t2
write(*,'("Time per iteration : ",es10.4)')t2/(iter) write(*,'("Time per iteration : ",es12.5)')t2/(iter)
write(*,'("Total time : ",es10.4)')t2+tprec write(*,'("Total time : ",es12.5)')t2+tprec
write(*,'("Residual 2-norm : ",es10.4)')resmx write(*,'("Residual 2-norm : ",es12.5)')resmx
write(*,'("Residual inf-norm : ",es10.4)')resmxp write(*,'("Residual inf-norm : ",es12.5)')resmxp
write(*,'("Total memory occupation for A : ",i10)')amatsize write(*,'("Total memory occupation for A : ",i10)')amatsize
write(*,'("Total memory occupation for DESC_A : ",i10)')descsize write(*,'("Total memory occupation for DESC_A : ",i10)')descsize
write(*,'("Total memory occupation for PREC : ",i10)')precsize write(*,'("Total memory occupation for PREC : ",i10)')precsize
@ -277,7 +277,7 @@ contains
use psb_base_mod use psb_base_mod
implicit none implicit none
integer :: idim integer :: idim
integer, parameter :: nbmax=10 integer, parameter :: nb=20
real(psb_spk_), allocatable :: b(:),xv(:) real(psb_spk_), allocatable :: b(:),xv(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer :: ictxt, info integer :: ictxt, info
@ -292,19 +292,18 @@ contains
end interface end interface
! local variables ! local variables
type(psb_sspmat_type) :: a type(psb_sspmat_type) :: a
real(psb_spk_) :: zt(nbmax),glob_x,glob_y,glob_z real(psb_spk_) :: zt(nb),glob_x,glob_y,glob_z
integer :: m,n,nnz,glob_row,ipoints integer :: m,n,nnz,glob_row,nlr,i,ii,ib,k,ipoints
integer :: x,y,z,ia,indx_owner integer :: x,y,z,ia,indx_owner
integer :: np, iam integer :: np, iam
integer :: element integer :: element
integer :: nv, inv integer, allocatable :: irow(:),icol(:),myidx(:)
integer, allocatable :: irow(:),icol(:)
real(psb_spk_), allocatable :: val(:) real(psb_spk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_spk_) :: deltah real(psb_spk_) :: deltah
real(psb_spk_),parameter :: rhs=0.e0,one=1.e0,zero=0.e0 real(psb_spk_),parameter :: rhs=0.e0,one=1.e0,zero=0.e0
real(psb_dpk_) :: t1, t2, t3, tins, tasb real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen
real(psb_spk_) :: a1, a2, a3, a4, b1, b2, b3 real(psb_spk_) :: a1, a2, a3, a4, b1, b2, b3
external :: a1, a2, a3, a4, b1, b2, b3 external :: a1, a2, a3, a4, b1, b2, b3
integer :: err_act integer :: err_act
@ -328,11 +327,17 @@ contains
nnz = ((n*9)/(np)) nnz = ((n*9)/(np))
if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n
call psb_barrier(ictxt)
t0 = psb_wtime()
call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts) call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts)
if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz) if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz)
! define rhs from boundary conditions; also build initial guess ! define rhs from boundary conditions; also build initial guess
if (info == 0) call psb_geall(b,desc_a,info) if (info == 0) call psb_geall(b,desc_a,info)
if (info == 0) call psb_geall(xv,desc_a,info) if (info == 0) call psb_geall(xv,desc_a,info)
nlr = psb_cd_get_local_rows(desc_a)
call psb_barrier(ictxt)
talc = psb_wtime()-t0
if (info /= 0) then if (info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
@ -343,26 +348,33 @@ contains
! time; just a small matrix. might be extended to generate ! time; just a small matrix. might be extended to generate
! a bunch of rows per call. ! a bunch of rows per call.
! !
allocate(val(20*nbmax),irow(20*nbmax),& allocate(val(20*nb),irow(20*nb),&
&icol(20*nbmax),stat=info) &icol(20*nb),myidx(nlr),stat=info)
if (info /= 0 ) then if (info /= 0 ) then
info=4000 info=4000
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
endif endif
tins = 0.d0 do i=1,nlr
call psb_barrier(ictxt) myidx(i) = i
t1 = psb_wtime() end do
call psb_loc_to_glob(myidx,desc_a,info)
! loop over rows belonging to current process in a block ! loop over rows belonging to current process in a block
! distribution. ! distribution.
do glob_row = 1, n call psb_barrier(ictxt)
! Figure out which rows are local to the current process: t1 = psb_wtime()
if (psb_is_owned(glob_row,desc_a)) then do ii=1, nlr,nb
ib = min(nb,nlr-ii+1)
element = 1
do k=1,ib
i=ii+k-1
! local matrix pointer ! local matrix pointer
element=1 glob_row=myidx(i)
! compute gridpoint coordinates ! compute gridpoint coordinates
if (mod(glob_row,ipoints*ipoints) == 0) then if (mod(glob_row,ipoints*ipoints) == 0) then
x = glob_row/(ipoints*ipoints) x = glob_row/(ipoints*ipoints)
@ -381,7 +393,7 @@ contains
glob_z=z*deltah glob_z=z*deltah
! check on boundary points ! check on boundary points
zt(1) = 0.d0 zt(k) = 0.d0
! internal point: build discretization ! internal point: build discretization
! !
! term depending on (x-1,y,z) ! term depending on (x-1,y,z)
@ -391,14 +403,15 @@ contains
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z)& val(element)=-b1(glob_x,glob_y,glob_z)&
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-2)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x-2)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y-1,z) ! term depending on (x,y-1,z)
if (y==1) then if (y==1) then
@ -406,14 +419,15 @@ contains
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_x**2-glob_z**2)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z)& val(element)=-b2(glob_x,glob_y,glob_z)&
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-2)*ipoints+(z) icol(element) = (x-1)*ipoints*ipoints+(y-2)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z-1) ! term depending on (x,y,z-1)
if (z==1) then if (z==1) then
@ -421,14 +435,15 @@ contains
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_y**2)*(-val(element)) zt(k) = exp(-glob_x**2-glob_y**2)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z)& val(element)=-b3(glob_x,glob_y,glob_z)&
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z-1) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z-1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z) ! term depending on (x,y,z)
val(element)=2*b1(glob_x,glob_y,glob_z)& val(element)=2*b1(glob_x,glob_y,glob_z)&
@ -439,65 +454,63 @@ contains
& +a3(glob_x,glob_y,glob_z) & +a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
! term depending on (x,y,z+1) ! term depending on (x,y,z+1)
if (z==ipoints) then if (z==ipoints) then
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_y**2)*exp(-glob_z)*(-val(element)) zt(k) = exp(-glob_x**2-glob_y**2)*exp(-glob_z)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z+1) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z+1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y+1,z) ! term depending on (x,y+1,z)
if (y==ipoints) then if (y==ipoints) then
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_z**2)*exp(-glob_y)*(-val(element)) zt(k) = exp(-glob_x**2-glob_z**2)*exp(-glob_y)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y)*ipoints+(z) icol(element)=(x-1)*ipoints*ipoints+(y)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x+1,y,z) ! term depending on (x+1,y,z)
if (x==ipoints) then if (x==ipoints) then
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
irow(1:element-1)=glob_row
ia=glob_row
t3 = psb_wtime() end do
call psb_spins(element-1,irow,icol,val,a,desc_a,info) call psb_spins(element-1,irow,icol,val,a,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
tins = tins + (psb_wtime()-t3) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),b,desc_a,info)
call psb_geins(1,(/ia/),zt(1:1),b,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
zt(1)=0.d0 zt(:)=0.d0
call psb_geins(1,(/ia/),zt(1:1),xv,desc_a,info) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
end if
end do end do
call psb_barrier(ictxt) tgen = psb_wtime()-t1
t2 = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
@ -506,29 +519,17 @@ contains
deallocate(val,irow,icol) deallocate(val,irow,icol)
call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)
call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_) if (info == 0) &
& call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_)
call psb_barrier(ictxt) call psb_barrier(ictxt)
tasb = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2)
call psb_amx(ictxt,tins)
call psb_amx(ictxt,tasb)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-pspins time : ",es10.4)')tins
write(*,'("-insert time : ",es10.4)')t2
write(*,'("-assembly time : ",es10.4)')tasb
end if
call psb_geasb(b,desc_a,info) call psb_geasb(b,desc_a,info)
call psb_geasb(xv,desc_a,info) call psb_geasb(xv,desc_a,info)
if(info /= 0) then if(info /= 0) then
@ -536,7 +537,23 @@ contains
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
tasb = psb_wtime()-t1
call psb_barrier(ictxt)
ttot = psb_wtime() - t0
call psb_amx(ictxt,talc)
call psb_amx(ictxt,tgen)
call psb_amx(ictxt,tasb)
call psb_amx(ictxt,ttot)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-allocation time : ",es12.5)') talc
write(*,'("-coeff. gen. time : ",es12.5)') tgen
write(*,'("-assembly time : ",es12.5)') tasb
write(*,'("-total time : ",es12.5)') ttot
end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

@ -134,7 +134,8 @@ program mld_sexample_ml
call psb_barrier(ictxt) call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call create_matrix(idim,A,b,x,desc_A,part_block,ictxt,info) call create_matrix(idim,a,b,x,desc_a,part_block,ictxt,info)
call psb_barrier(ictxt)
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
@ -142,8 +143,7 @@ program mld_sexample_ml
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2) if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es10.4)')t2
if (iam == psb_root_) write(*,'(" ")') if (iam == psb_root_) write(*,'(" ")')
select case(choice) select case(choice)
@ -236,13 +236,13 @@ program mld_sexample_ml
write(*,'("Matrix from PDE example")') write(*,'("Matrix from PDE example")')
write(*,'("Computed solution on ",i8," processors")')np write(*,'("Computed solution on ",i8," processors")')np
write(*,'("Iterations to convergence : ",i6)')iter write(*,'("Iterations to convergence : ",i6)')iter
write(*,'("Error estimate on exit : ",es10.4)')err write(*,'("Error estimate on exit : ",es12.5)')err
write(*,'("Time to build prec. : ",es10.4)')tprec write(*,'("Time to build prec. : ",es12.5)')tprec
write(*,'("Time to solve system : ",es10.4)')t2 write(*,'("Time to solve system : ",es12.5)')t2
write(*,'("Time per iteration : ",es10.4)')t2/(iter) write(*,'("Time per iteration : ",es12.5)')t2/(iter)
write(*,'("Total time : ",es10.4)')t2+tprec write(*,'("Total time : ",es12.5)')t2+tprec
write(*,'("Residual 2-norm : ",es10.4)')resmx write(*,'("Residual 2-norm : ",es12.5)')resmx
write(*,'("Residual inf-norm : ",es10.4)')resmxp write(*,'("Residual inf-norm : ",es12.5)')resmxp
write(*,'("Total memory occupation for A : ",i10)')amatsize write(*,'("Total memory occupation for A : ",i10)')amatsize
write(*,'("Total memory occupation for DESC_A : ",i10)')descsize write(*,'("Total memory occupation for DESC_A : ",i10)')descsize
write(*,'("Total memory occupation for PREC : ",i10)')precsize write(*,'("Total memory occupation for PREC : ",i10)')precsize
@ -315,7 +315,7 @@ contains
use psb_base_mod use psb_base_mod
implicit none implicit none
integer :: idim integer :: idim
integer, parameter :: nbmax=10 integer, parameter :: nb=20
real(psb_spk_), allocatable :: b(:),xv(:) real(psb_spk_), allocatable :: b(:),xv(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer :: ictxt, info integer :: ictxt, info
@ -330,19 +330,18 @@ contains
end interface end interface
! local variables ! local variables
type(psb_sspmat_type) :: a type(psb_sspmat_type) :: a
real(psb_spk_) :: zt(nbmax),glob_x,glob_y,glob_z real(psb_spk_) :: zt(nb),glob_x,glob_y,glob_z
integer :: m,n,nnz,glob_row,ipoints integer :: m,n,nnz,glob_row,nlr,i,ii,ib,k,ipoints
integer :: x,y,z,ia,indx_owner integer :: x,y,z,ia,indx_owner
integer :: np, iam integer :: np, iam
integer :: element integer :: element
integer :: nv, inv integer, allocatable :: irow(:),icol(:),myidx(:)
integer, allocatable :: irow(:),icol(:)
real(psb_spk_), allocatable :: val(:) real(psb_spk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_spk_) :: deltah real(psb_spk_) :: deltah
real(psb_spk_),parameter :: rhs=0.e0,one=1.e0,zero=0.e0 real(psb_spk_),parameter :: rhs=0.e0,one=1.e0,zero=0.e0
real(psb_dpk_) :: t1, t2, t3, tins, tasb real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen
real(psb_spk_) :: a1, a2, a3, a4, b1, b2, b3 real(psb_spk_) :: a1, a2, a3, a4, b1, b2, b3
external :: a1, a2, a3, a4, b1, b2, b3 external :: a1, a2, a3, a4, b1, b2, b3
integer :: err_act integer :: err_act
@ -366,11 +365,17 @@ contains
nnz = ((n*9)/(np)) nnz = ((n*9)/(np))
if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n
call psb_barrier(ictxt)
t0 = psb_wtime()
call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts) call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts)
if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz) if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz)
! define rhs from boundary conditions; also build initial guess ! define rhs from boundary conditions; also build initial guess
if (info == 0) call psb_geall(b,desc_a,info) if (info == 0) call psb_geall(b,desc_a,info)
if (info == 0) call psb_geall(xv,desc_a,info) if (info == 0) call psb_geall(xv,desc_a,info)
nlr = psb_cd_get_local_rows(desc_a)
call psb_barrier(ictxt)
talc = psb_wtime()-t0
if (info /= 0) then if (info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
@ -381,26 +386,33 @@ contains
! time; just a small matrix. might be extended to generate ! time; just a small matrix. might be extended to generate
! a bunch of rows per call. ! a bunch of rows per call.
! !
allocate(val(20*nbmax),irow(20*nbmax),& allocate(val(20*nb),irow(20*nb),&
&icol(20*nbmax),stat=info) &icol(20*nb),myidx(nlr),stat=info)
if (info /= 0 ) then if (info /= 0 ) then
info=4000 info=4000
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
endif endif
tins = 0.d0 do i=1,nlr
call psb_barrier(ictxt) myidx(i) = i
t1 = psb_wtime() end do
call psb_loc_to_glob(myidx,desc_a,info)
! loop over rows belonging to current process in a block ! loop over rows belonging to current process in a block
! distribution. ! distribution.
do glob_row = 1, n call psb_barrier(ictxt)
! Figure out which rows are local to the current process: t1 = psb_wtime()
if (psb_is_owned(glob_row,desc_a)) then do ii=1, nlr,nb
ib = min(nb,nlr-ii+1)
element = 1
do k=1,ib
i=ii+k-1
! local matrix pointer ! local matrix pointer
element=1 glob_row=myidx(i)
! compute gridpoint coordinates ! compute gridpoint coordinates
if (mod(glob_row,ipoints*ipoints) == 0) then if (mod(glob_row,ipoints*ipoints) == 0) then
x = glob_row/(ipoints*ipoints) x = glob_row/(ipoints*ipoints)
@ -419,7 +431,7 @@ contains
glob_z=z*deltah glob_z=z*deltah
! check on boundary points ! check on boundary points
zt(1) = 0.d0 zt(k) = 0.d0
! internal point: build discretization ! internal point: build discretization
! !
! term depending on (x-1,y,z) ! term depending on (x-1,y,z)
@ -429,14 +441,15 @@ contains
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z)& val(element)=-b1(glob_x,glob_y,glob_z)&
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-2)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x-2)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y-1,z) ! term depending on (x,y-1,z)
if (y==1) then if (y==1) then
@ -444,14 +457,15 @@ contains
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_x**2-glob_z**2)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z)& val(element)=-b2(glob_x,glob_y,glob_z)&
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-2)*ipoints+(z) icol(element) = (x-1)*ipoints*ipoints+(y-2)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z-1) ! term depending on (x,y,z-1)
if (z==1) then if (z==1) then
@ -459,14 +473,15 @@ contains
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_y**2)*(-val(element)) zt(k) = exp(-glob_x**2-glob_y**2)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z)& val(element)=-b3(glob_x,glob_y,glob_z)&
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z-1) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z-1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z) ! term depending on (x,y,z)
val(element)=2*b1(glob_x,glob_y,glob_z)& val(element)=2*b1(glob_x,glob_y,glob_z)&
@ -477,65 +492,63 @@ contains
& +a3(glob_x,glob_y,glob_z) & +a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
! term depending on (x,y,z+1) ! term depending on (x,y,z+1)
if (z==ipoints) then if (z==ipoints) then
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_y**2)*exp(-glob_z)*(-val(element)) zt(k) = exp(-glob_x**2-glob_y**2)*exp(-glob_z)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y-1)*ipoints+(z+1) icol(element) = (x-1)*ipoints*ipoints+(y-1)*ipoints+(z+1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y+1,z) ! term depending on (x,y+1,z)
if (y==ipoints) then if (y==ipoints) then
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_x**2-glob_z**2)*exp(-glob_y)*(-val(element)) zt(k) = exp(-glob_x**2-glob_z**2)*exp(-glob_y)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*ipoints*ipoints+(y)*ipoints+(z) icol(element)=(x-1)*ipoints*ipoints+(y)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x+1,y,z) ! term depending on (x+1,y,z)
if (x==ipoints) then if (x==ipoints) then
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x)*ipoints*ipoints+(y-1)*ipoints+(z) icol(element) = (x)*ipoints*ipoints+(y-1)*ipoints+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
irow(1:element-1)=glob_row
ia=glob_row
t3 = psb_wtime() end do
call psb_spins(element-1,irow,icol,val,a,desc_a,info) call psb_spins(element-1,irow,icol,val,a,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
tins = tins + (psb_wtime()-t3) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),b,desc_a,info)
call psb_geins(1,(/ia/),zt(1:1),b,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
zt(1)=0.d0 zt(:)=0.d0
call psb_geins(1,(/ia/),zt(1:1),xv,desc_a,info) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
end if
end do end do
call psb_barrier(ictxt) tgen = psb_wtime()-t1
t2 = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
@ -544,29 +557,17 @@ contains
deallocate(val,irow,icol) deallocate(val,irow,icol)
call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)
call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_) if (info == 0) &
& call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_)
call psb_barrier(ictxt) call psb_barrier(ictxt)
tasb = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2)
call psb_amx(ictxt,tins)
call psb_amx(ictxt,tasb)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-pspins time : ",es10.4)')tins
write(*,'("-insert time : ",es10.4)')t2
write(*,'("-assembly time : ",es10.4)')tasb
end if
call psb_geasb(b,desc_a,info) call psb_geasb(b,desc_a,info)
call psb_geasb(xv,desc_a,info) call psb_geasb(xv,desc_a,info)
if(info /= 0) then if(info /= 0) then
@ -574,7 +575,23 @@ contains
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
tasb = psb_wtime()-t1
call psb_barrier(ictxt)
ttot = psb_wtime() - t0
call psb_amx(ictxt,talc)
call psb_amx(ictxt,tgen)
call psb_amx(ictxt,tasb)
call psb_amx(ictxt,ttot)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-allocation time : ",es12.5)') talc
write(*,'("-coeff. gen. time : ",es12.5)') tgen
write(*,'("-assembly time : ",es12.5)') tasb
write(*,'("-total time : ",es12.5)') ttot
end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

@ -153,6 +153,7 @@ program ppde
call psb_barrier(ictxt) call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call create_matrix(idim,a,b,x,desc_a,part_block,ictxt,afmt,info) call create_matrix(idim,a,b,x,desc_a,part_block,ictxt,afmt,info)
call psb_barrier(ictxt)
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
@ -161,8 +162,7 @@ program ppde
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2) if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es10.4)')t2
if (iam == psb_root_) write(*,'(" ")') if (iam == psb_root_) write(*,'(" ")')
! !
! prepare the preconditioner. ! prepare the preconditioner.
@ -208,7 +208,7 @@ program ppde
call psb_amx(ictxt,tprec) call psb_amx(ictxt,tprec)
if (iam == psb_root_) write(*,'("Preconditioner time : ",es10.4)')tprec if (iam == psb_root_) write(*,'("Preconditioner time : ",es12.5)')tprec
if (iam == psb_root_) call mld_precdescr(prec,info) if (iam == psb_root_) call mld_precdescr(prec,info)
if (iam == psb_root_) write(*,'(" ")') if (iam == psb_root_) write(*,'(" ")')
@ -235,10 +235,10 @@ program ppde
if (iam == psb_root_) then if (iam == psb_root_) then
write(*,'(" ")') write(*,'(" ")')
write(*,'("Time to solve matrix : ",es10.4)')t2 write(*,'("Time to solve matrix : ",es12.5)')t2
write(*,'("Time per iteration : ",es10.4)')t2/iter write(*,'("Time per iteration : ",es12.5)')t2/iter
write(*,'("Number of iterations : ",i0)')iter write(*,'("Number of iterations : ",i0)')iter
write(*,'("Convergence indicator on exit : ",es10.4)')err write(*,'("Convergence indicator on exit : ",es12.5)')err
write(*,'("Info on exit : ",i0)')info write(*,'("Info on exit : ",i0)')info
end if end if
@ -402,7 +402,7 @@ contains
use psb_base_mod use psb_base_mod
implicit none implicit none
integer :: idim integer :: idim
integer, parameter :: nbmax=10 integer, parameter :: nb=20
real(psb_dpk_), allocatable :: b(:),xv(:) real(psb_dpk_), allocatable :: b(:),xv(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer :: ictxt, info integer :: ictxt, info
@ -417,22 +417,21 @@ contains
end subroutine parts end subroutine parts
end interface ! local variables end interface ! local variables
type(psb_dspmat_type) :: a type(psb_dspmat_type) :: a
real(psb_dpk_) :: zt(nbmax),glob_x,glob_y,glob_z real(psb_dpk_) :: zt(nb),glob_x,glob_y,glob_z
integer :: m,n,nnz,glob_row integer :: m,n,nnz,glob_row,nlr,i,ii,ib,k
integer :: x,y,z,ia,indx_owner integer :: x,y,z,ia,indx_owner
integer :: np, iam integer :: np, iam
integer :: element integer :: element
integer, allocatable :: irow(:),icol(:) integer, allocatable :: irow(:),icol(:),myidx(:)
real(psb_dpk_), allocatable :: val(:) real(psb_dpk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_dpk_) :: deltah real(psb_dpk_) :: deltah
real(psb_dpk_),parameter :: rhs=0.d0,one=1.d0,zero=0.d0 real(psb_dpk_),parameter :: rhs=0.d0,one=1.d0,zero=0.d0
real(psb_dpk_) :: t1, t2, t3, tins, tasb real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen
real(psb_dpk_) :: a1, a2, a3, a4, b1, b2, b3 real(psb_dpk_) :: a1, a2, a3, a4, b1, b2, b3
external :: a1, a2, a3, a4, b1, b2, b3 external :: a1, a2, a3, a4, b1, b2, b3
integer :: err_act integer :: err_act
! common area
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
@ -452,11 +451,17 @@ contains
nnz = ((n*9)/(np)) nnz = ((n*9)/(np))
if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n
call psb_barrier(ictxt)
t0 = psb_wtime()
call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts) call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts)
if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz) if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz)
! define rhs from boundary conditions; also build initial guess ! define rhs from boundary conditions; also build initial guess
if (info == 0) call psb_geall(b,desc_a,info) if (info == 0) call psb_geall(b,desc_a,info)
if (info == 0) call psb_geall(xv,desc_a,info) if (info == 0) call psb_geall(xv,desc_a,info)
nlr = psb_cd_get_local_rows(desc_a)
call psb_barrier(ictxt)
talc = psb_wtime()-t0
if (info /= 0) then if (info /= 0) then
info=4010 info=4010
ch_err='allocation rout.' ch_err='allocation rout.'
@ -468,27 +473,33 @@ contains
! time; just a small matrix. might be extended to generate ! time; just a small matrix. might be extended to generate
! a bunch of rows per call. ! a bunch of rows per call.
! !
allocate(val(20*nbmax),irow(20*nbmax),& allocate(val(20*nb),irow(20*nb),&
&icol(20*nbmax),stat=info) &icol(20*nb),myidx(nlr),stat=info)
if (info /= 0 ) then if (info /= 0 ) then
info=4000 info=4000
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
endif endif
tins = 0.d0 do i=1,nlr
call psb_barrier(ictxt) myidx(i) = i
t1 = psb_wtime() end do
call psb_loc_to_glob(myidx,desc_a,info)
! loop over rows belonging to current process in a block ! loop over rows belonging to current process in a block
! distribution. ! distribution.
! icol(1)=1 call psb_barrier(ictxt)
do glob_row = 1, n t1 = psb_wtime()
! Figure out which rows are local to the current process: do ii=1, nlr,nb
if (psb_is_owned(glob_row,desc_a)) then ib = min(nb,nlr-ii+1)
element = 1
do k=1,ib
i=ii+k-1
! local matrix pointer ! local matrix pointer
element=1 glob_row=myidx(i)
! compute gridpoint coordinates ! compute gridpoint coordinates
if (mod(glob_row,(idim*idim)) == 0) then if (mod(glob_row,(idim*idim)) == 0) then
x = glob_row/(idim*idim) x = glob_row/(idim*idim)
@ -507,7 +518,7 @@ contains
glob_z=z*deltah glob_z=z*deltah
! check on boundary points ! check on boundary points
zt(1) = 0.d0 zt(k) = 0.d0
! internal point: build discretization ! internal point: build discretization
! !
! term depending on (x-1,y,z) ! term depending on (x-1,y,z)
@ -517,14 +528,15 @@ contains
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z)& val(element)=-b1(glob_x,glob_y,glob_z)&
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-2)*idim*idim+(y-1)*idim+(z) icol(element) = (x-2)*idim*idim+(y-1)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y-1,z) ! term depending on (x,y-1,z)
if (y==1) then if (y==1) then
@ -532,14 +544,15 @@ contains
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z)& val(element)=-b2(glob_x,glob_y,glob_z)&
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y-2)*idim+(z) icol(element) = (x-1)*idim*idim+(y-2)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z-1) ! term depending on (x,y,z-1)
if (z==1) then if (z==1) then
@ -547,14 +560,15 @@ contains
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z)& val(element)=-b3(glob_x,glob_y,glob_z)&
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y-1)*idim+(z-1) icol(element) = (x-1)*idim*idim+(y-1)*idim+(z-1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z) ! term depending on (x,y,z)
val(element)=2*b1(glob_x,glob_y,glob_z)& val(element)=2*b1(glob_x,glob_y,glob_z)&
@ -565,60 +579,58 @@ contains
& +a3(glob_x,glob_y,glob_z) & +a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y-1)*idim+(z) icol(element) = (x-1)*idim*idim+(y-1)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
! term depending on (x,y,z+1) ! term depending on (x,y,z+1)
if (z==idim) then if (z==idim) then
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y-1)*idim+(z+1) icol(element) = (x-1)*idim*idim+(y-1)*idim+(z+1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y+1,z) ! term depending on (x,y+1,z)
if (y==idim) then if (y==idim) then
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y)*idim+(z) icol(element) = (x-1)*idim*idim+(y)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x+1,y,z) ! term depending on (x+1,y,z)
if (x<idim) then if (x<idim) then
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x)*idim*idim+(y-1)*idim+(z) icol(element) = (x)*idim*idim+(y-1)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
irow(1:element-1)=glob_row
ia=glob_row
t3 = psb_wtime() end do
call psb_spins(element-1,irow,icol,val,a,desc_a,info) call psb_spins(element-1,irow,icol,val,a,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
tins = tins + (psb_wtime()-t3) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),b,desc_a,info)
call psb_geins(1,(/ia/),zt(1:1),b,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
zt(1)=0.d0 zt(:)=0.d0
call psb_geins(1,(/ia/),zt(1:1),xv,desc_a,info) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
end if
end do end do
call psb_barrier(ictxt) tgen = psb_wtime()-t1
t2 = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
ch_err='insert rout.' ch_err='insert rout.'
@ -628,30 +640,18 @@ contains
deallocate(val,irow,icol) deallocate(val,irow,icol)
call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)
call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt) if (info == 0) &
& call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt)
call psb_barrier(ictxt) call psb_barrier(ictxt)
tasb = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
ch_err='asb rout.' ch_err='asb rout.'
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2)
call psb_amx(ictxt,tins)
call psb_amx(ictxt,tasb)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-pspins time : ",es10.4)')tins
write(*,'("-insert time : ",es10.4)')t2
write(*,'("-assembly time : ",es10.4)')tasb
end if
call psb_geasb(b,desc_a,info) call psb_geasb(b,desc_a,info)
call psb_geasb(xv,desc_a,info) call psb_geasb(xv,desc_a,info)
if(info /= 0) then if(info /= 0) then
@ -660,7 +660,23 @@ contains
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
tasb = psb_wtime()-t1
call psb_barrier(ictxt)
ttot = psb_wtime() - t0
call psb_amx(ictxt,talc)
call psb_amx(ictxt,tgen)
call psb_amx(ictxt,tasb)
call psb_amx(ictxt,ttot)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-allocation time : ",es12.5)') talc
write(*,'("-coeff. gen. time : ",es12.5)') tgen
write(*,'("-assembly time : ",es12.5)') tasb
write(*,'("-total time : ",es12.5)') ttot
end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

@ -153,6 +153,7 @@ program spde
call psb_barrier(ictxt) call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call create_matrix(idim,a,b,x,desc_a,part_block,ictxt,afmt,info) call create_matrix(idim,a,b,x,desc_a,part_block,ictxt,afmt,info)
call psb_barrier(ictxt)
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
@ -161,8 +162,7 @@ program spde
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2) if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es12.5)')t2
if (iam == psb_root_) write(*,'("Overall matrix creation time : ",es10.4)')t2
if (iam == psb_root_) write(*,'(" ")') if (iam == psb_root_) write(*,'(" ")')
! !
! prepare the preconditioner. ! prepare the preconditioner.
@ -208,7 +208,7 @@ program spde
call psb_amx(ictxt,tprec) call psb_amx(ictxt,tprec)
if (iam == psb_root_) write(*,'("Preconditioner time : ",es10.4)')tprec if (iam == psb_root_) write(*,'("Preconditioner time : ",es12.5)')tprec
if (iam == psb_root_) call mld_precdescr(prec,info) if (iam == psb_root_) call mld_precdescr(prec,info)
if (iam == psb_root_) write(*,'(" ")') if (iam == psb_root_) write(*,'(" ")')
@ -235,10 +235,10 @@ program spde
if (iam == psb_root_) then if (iam == psb_root_) then
write(*,'(" ")') write(*,'(" ")')
write(*,'("Time to solve matrix : ",es10.4)')t2 write(*,'("Time to solve matrix : ",es12.5)')t2
write(*,'("Time per iteration : ",es10.4)')t2/iter write(*,'("Time per iteration : ",es12.5)')t2/iter
write(*,'("Number of iterations : ",i0)')iter write(*,'("Number of iterations : ",i0)')iter
write(*,'("Convergence indicator on exit : ",es10.4)')err write(*,'("Convergence indicator on exit : ",es12.5)')err
write(*,'("Info on exit : ",i0)')info write(*,'("Info on exit : ",i0)')info
end if end if
@ -402,7 +402,7 @@ contains
use psb_base_mod use psb_base_mod
implicit none implicit none
integer :: idim integer :: idim
integer, parameter :: nbmax=10 integer, parameter :: nb=20
real(psb_spk_), allocatable :: b(:),xv(:) real(psb_spk_), allocatable :: b(:),xv(:)
type(psb_desc_type) :: desc_a type(psb_desc_type) :: desc_a
integer :: ictxt, info integer :: ictxt, info
@ -417,18 +417,18 @@ contains
end subroutine parts end subroutine parts
end interface ! local variables end interface ! local variables
type(psb_sspmat_type) :: a type(psb_sspmat_type) :: a
real(psb_spk_) :: zt(nbmax),glob_x,glob_y,glob_z real(psb_spk_) :: zt(nb),glob_x,glob_y,glob_z
integer :: m,n,nnz,glob_row integer :: m,n,nnz,glob_row,nlr,i,ii,ib,k
integer :: x,y,z,ia,indx_owner integer :: x,y,z,ia,indx_owner
integer :: np, iam integer :: np, iam
integer :: element integer :: element
integer, allocatable :: irow(:),icol(:) integer, allocatable :: irow(:),icol(:),myidx(:)
real(psb_spk_), allocatable :: val(:) real(psb_spk_), allocatable :: val(:)
! deltah dimension of each grid cell ! deltah dimension of each grid cell
! deltat discretization time ! deltat discretization time
real(psb_spk_) :: deltah real(psb_spk_) :: deltah
real(psb_spk_),parameter :: rhs=0.0,one=1.0,zero=0.0 real(psb_spk_),parameter :: rhs=0.0,one=1.0,zero=0.0
real(psb_dpk_) :: t1, t2, t3, tins, tasb real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen
real(psb_spk_) :: a1, a2, a3, a4, b1, b2, b3 real(psb_spk_) :: a1, a2, a3, a4, b1, b2, b3
external :: a1, a2, a3, a4, b1, b2, b3 external :: a1, a2, a3, a4, b1, b2, b3
integer :: err_act integer :: err_act
@ -452,11 +452,17 @@ contains
nnz = ((n*9)/(np)) nnz = ((n*9)/(np))
if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n if(iam == psb_root_) write(0,'("Generating Matrix (size=",i0x,")...")')n
call psb_barrier(ictxt)
t0 = psb_wtime()
call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts) call psb_cdall(ictxt,desc_a,info,mg=n,parts=parts)
if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz) if (info == 0) call psb_spall(a,desc_a,info,nnz=nnz)
! define rhs from boundary conditions; also build initial guess ! define rhs from boundary conditions; also build initial guess
if (info == 0) call psb_geall(b,desc_a,info) if (info == 0) call psb_geall(b,desc_a,info)
if (info == 0) call psb_geall(xv,desc_a,info) if (info == 0) call psb_geall(xv,desc_a,info)
nlr = psb_cd_get_local_rows(desc_a)
call psb_barrier(ictxt)
talc = psb_wtime()-t0
if (info /= 0) then if (info /= 0) then
info=4010 info=4010
ch_err='allocation rout.' ch_err='allocation rout.'
@ -468,27 +474,33 @@ contains
! time; just a small matrix. might be extended to generate ! time; just a small matrix. might be extended to generate
! a bunch of rows per call. ! a bunch of rows per call.
! !
allocate(val(20*nbmax),irow(20*nbmax),& allocate(val(20*nb),irow(20*nb),&
&icol(20*nbmax),stat=info) &icol(20*nb),myidx(nlr),stat=info)
if (info /= 0 ) then if (info /= 0 ) then
info=4000 info=4000
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
endif endif
tins = 0.d0 do i=1,nlr
call psb_barrier(ictxt) myidx(i) = i
t1 = psb_wtime() end do
call psb_loc_to_glob(myidx,desc_a,info)
! loop over rows belonging to current process in a block ! loop over rows belonging to current process in a block
! distribution. ! distribution.
! icol(1)=1 call psb_barrier(ictxt)
do glob_row = 1, n t1 = psb_wtime()
! Figure out which rows are local to the current process: do ii=1, nlr,nb
if (psb_is_owned(glob_row,desc_a)) then ib = min(nb,nlr-ii+1)
element = 1
do k=1,ib
i=ii+k-1
! local matrix pointer ! local matrix pointer
element=1 glob_row=myidx(i)
! compute gridpoint coordinates ! compute gridpoint coordinates
if (mod(glob_row,(idim*idim)) == 0) then if (mod(glob_row,(idim*idim)) == 0) then
x = glob_row/(idim*idim) x = glob_row/(idim*idim)
@ -507,7 +519,7 @@ contains
glob_z=z*deltah glob_z=z*deltah
! check on boundary points ! check on boundary points
zt(1) = 0.d0 zt(k) = 0.d0
! internal point: build discretization ! internal point: build discretization
! !
! term depending on (x-1,y,z) ! term depending on (x-1,y,z)
@ -517,14 +529,15 @@ contains
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z)& val(element)=-b1(glob_x,glob_y,glob_z)&
& -a1(glob_x,glob_y,glob_z) & -a1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-2)*idim*idim+(y-1)*idim+(z) icol(element) = (x-2)*idim*idim+(y-1)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y-1,z) ! term depending on (x,y-1,z)
if (y==1) then if (y==1) then
@ -532,14 +545,15 @@ contains
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z)& val(element)=-b2(glob_x,glob_y,glob_z)&
& -a2(glob_x,glob_y,glob_z) & -a2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y-2)*idim+(z) icol(element) = (x-1)*idim*idim+(y-2)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z-1) ! term depending on (x,y,z-1)
if (z==1) then if (z==1) then
@ -547,14 +561,15 @@ contains
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b3(glob_x,glob_y,glob_z)& val(element)=-b3(glob_x,glob_y,glob_z)&
& -a3(glob_x,glob_y,glob_z) & -a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y-1)*idim+(z-1) icol(element) = (x-1)*idim*idim+(y-1)*idim+(z-1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y,z) ! term depending on (x,y,z)
val(element)=2*b1(glob_x,glob_y,glob_z)& val(element)=2*b1(glob_x,glob_y,glob_z)&
@ -565,32 +580,35 @@ contains
& +a3(glob_x,glob_y,glob_z) & +a3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y-1)*idim+(z) icol(element) = (x-1)*idim*idim+(y-1)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
! term depending on (x,y,z+1) ! term depending on (x,y,z+1)
if (z==idim) then if (z==idim) then
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b1(glob_x,glob_y,glob_z) val(element)=-b1(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y-1)*idim+(z+1) icol(element) = (x-1)*idim*idim+(y-1)*idim+(z+1)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
! term depending on (x,y+1,z) ! term depending on (x,y+1,z)
if (y==idim) then if (y==idim) then
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element)) zt(k) = exp(-glob_y**2-glob_z**2)*exp(-glob_x)*(-val(element))
else else
val(element)=-b2(glob_x,glob_y,glob_z) val(element)=-b2(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x-1)*idim*idim+(y)*idim+(z) icol(element)=(x-1)*idim*idim+(y)*idim+(z)
irow(element) = glob_row
element=element+1 element=element+1
endif endif
! term depending on (x+1,y,z) ! term depending on (x+1,y,z)
@ -598,27 +616,22 @@ contains
val(element)=-b3(glob_x,glob_y,glob_z) val(element)=-b3(glob_x,glob_y,glob_z)
val(element) = val(element)/(deltah*& val(element) = val(element)/(deltah*&
& deltah) & deltah)
icol(element)=(x)*idim*idim+(y-1)*idim+(z) icol(element) = (x)*idim*idim+(y-1)*idim+(z)
element=element+1 irow(element) = glob_row
element = element+1
endif endif
irow(1:element-1)=glob_row
ia=glob_row
t3 = psb_wtime() end do
call psb_spins(element-1,irow,icol,val,a,desc_a,info) call psb_spins(element-1,irow,icol,val,a,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
tins = tins + (psb_wtime()-t3) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),b,desc_a,info)
call psb_geins(1,(/ia/),zt(1:1),b,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
zt(1)=0.d0 zt(:)=0.d0
call psb_geins(1,(/ia/),zt(1:1),xv,desc_a,info) call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= 0) exit if(info /= 0) exit
end if
end do end do
call psb_barrier(ictxt) tgen = psb_wtime()-t1
t2 = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
ch_err='insert rout.' ch_err='insert rout.'
@ -628,30 +641,18 @@ contains
deallocate(val,irow,icol) deallocate(val,irow,icol)
call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_cdasb(desc_a,info) call psb_cdasb(desc_a,info)
call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt) if (info == 0) &
& call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt)
call psb_barrier(ictxt) call psb_barrier(ictxt)
tasb = psb_wtime()-t1
if(info /= 0) then if(info /= 0) then
info=4010 info=4010
ch_err='asb rout.' ch_err='asb rout.'
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
call psb_amx(ictxt,t2)
call psb_amx(ictxt,tins)
call psb_amx(ictxt,tasb)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-pspins time : ",es10.4)')tins
write(*,'("-insert time : ",es10.4)')t2
write(*,'("-assembly time : ",es10.4)')tasb
end if
call psb_geasb(b,desc_a,info) call psb_geasb(b,desc_a,info)
call psb_geasb(xv,desc_a,info) call psb_geasb(xv,desc_a,info)
if(info /= 0) then if(info /= 0) then
@ -660,7 +661,23 @@ contains
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
tasb = psb_wtime()-t1
call psb_barrier(ictxt)
ttot = psb_wtime() - t0
call psb_amx(ictxt,talc)
call psb_amx(ictxt,tgen)
call psb_amx(ictxt,tasb)
call psb_amx(ictxt,ttot)
if(iam == psb_root_) then
write(*,'("The matrix has been generated and assembled in ",a3," format.")')&
& a%fida(1:3)
write(*,'("-allocation time : ",es12.5)') talc
write(*,'("-coeff. gen. time : ",es12.5)') tgen
write(*,'("-assembly time : ",es12.5)') tasb
write(*,'("-total time : ",es12.5)') ttot
end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

Loading…
Cancel
Save