From 8af5d99e73f18c79de1a313ce007c3ac1f822332 Mon Sep 17 00:00:00 2001
From: Salvatore Filippone <salvatore.filippone@cranfield.ac.uk>
Date: Wed, 2 Jan 2008 12:05:55 +0000
Subject: [PATCH] Shuffled around clipping function.

---
 mlprec/mld_dbjac_bld.f90 | 194 ++++++++++++++++-----------------------
 mlprec/mld_zbjac_bld.f90 | 194 ++++++++++++++++-----------------------
 2 files changed, 156 insertions(+), 232 deletions(-)

diff --git a/mlprec/mld_dbjac_bld.f90 b/mlprec/mld_dbjac_bld.f90
index 68473688..85b78f31 100644
--- a/mlprec/mld_dbjac_bld.f90
+++ b/mlprec/mld_dbjac_bld.f90
@@ -227,27 +227,28 @@ subroutine mld_dbjac_bld(a,p,upd,info,data)
     ! Clip into p%av(ap_nd_) the off block-diagonal part of the local
     ! matrix. The clipped matrix is then stored in CSR format.
     !
-    call psb_sp_clip(atmp,p%av(mld_ap_nd_),info,&
-         & jmin=atmp%m+1,rscale=.false.,cscale=.false.)
-    if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
-         & afmt='csr',dupl=psb_dupl_add_)
-    if (info /= 0) then
-      call psb_errpush(4010,name,a_err='psb_spcnv')
-      goto 9999
-    end if
-
-    k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
-    call psb_sum(ictxt,k)
-
-    if (k == 0) then
-      ! 
-      ! If the off diagonal part is emtpy, there is no point in doing
-      ! multiple Jacobi sweeps. This is certain to happen when running
-      ! on a single processor.
-      !
-      p%iprcparm(mld_smooth_sweeps_) = 1
+    if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
+      call psb_sp_clip(atmp,p%av(mld_ap_nd_),info,&
+           & jmin=atmp%m+1,rscale=.false.,cscale=.false.)
+      if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
+           & afmt='csr',dupl=psb_dupl_add_)
+      if (info /= 0) then
+        call psb_errpush(4010,name,a_err='psb_spcnv')
+        goto 9999
+      end if
+      
+      k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
+      call psb_sum(ictxt,k)
+      
+      if (k == 0) then
+        ! 
+        ! If the off diagonal part is emtpy, there is no point in doing
+        ! multiple Jacobi sweeps. This is certain to happen when running
+        ! on a single processor.
+        !
+        p%iprcparm(mld_smooth_sweeps_) = 1
+      end if
     end if
-
     if (debug_level >= psb_debug_outer_) &
          & write(debug_unit,*) me,' ',trim(name),' Factoring rows ',&
          & atmp%m,a%m,blck%m,atmp%ia2(atmp%m+1)-1
@@ -280,6 +281,19 @@ subroutine mld_dbjac_bld(a,p,upd,info,data)
         goto 9999
       end if
 
+    case(mld_sludist_)
+      !
+      ! LU factorization through the SuperLU_DIST package. This works only
+      ! when the matrix is distributed among the processes.
+      ! NOTE: Should have NO overlap here!!!!   
+      !
+      call psb_spcnv(a,atmp,info,afmt='csr')
+      if (info == 0) call mld_sludist_bld(atmp,p%desc_data,p,info)
+      if (info /= 0) then
+        call psb_errpush(4010,name,a_err='mld_sludist_bld')
+        goto 9999
+      end if
+
     case(mld_umf_)
       !
       ! LU factorization through the UMFPACK package.
@@ -317,6 +331,49 @@ subroutine mld_dbjac_bld(a,p,upd,info,data)
     !
   case(0)
 
+
+    !
+    ! In case of multiple block-Jacobi sweeps, clip into p%av(ap_nd_)
+    ! the off block-diagonal part of the local extended matrix. The
+    ! clipped matrix is then stored in CSR format.
+    !
+    
+    if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
+      n_row = psb_cd_get_local_rows(p%desc_data)
+      n_col = psb_cd_get_local_cols(p%desc_data)
+      nrow_a = a%m 
+      ! The following is known to work 
+      ! given that the output from CLIP is in COO. 
+      call psb_sp_clip(a,p%av(mld_ap_nd_),info,&
+           & jmin=nrow_a+1,rscale=.false.,cscale=.false.)
+      if (info == 0) call psb_sp_clip(blck,atmp,info,&
+           & jmin=nrow_a+1,rscale=.false.,cscale=.false.)
+      if (info == 0) call psb_rwextd(n_row,p%av(mld_ap_nd_),info,b=atmp) 
+      if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
+           & afmt='csr',dupl=psb_dupl_add_)
+      if (info /= 0) then
+        call psb_errpush(4010,name,a_err='clip & psb_spcnv csr 4')
+        goto 9999
+      end if
+      
+      k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
+      call psb_sum(ictxt,k)
+      
+      if (k == 0) then 
+        !
+        ! If the off block-diagonal part is emtpy, there is no point in doing
+        ! multiple Jacobi sweeps. This is certain to happen when running
+        ! on a single processor.
+        !
+        p%iprcparm(mld_smooth_sweeps_) = 1
+      end if
+      call psb_sp_free(atmp,info) 
+      if (info/=0) then
+        call psb_errpush(4010,name,a_err='psb_sp_free')
+        goto 9999
+      end if
+    end if
+
     ! 
     ! Compute a factorization of the diagonal block of the local matrix,
     ! according to the choice made by the user by setting p%iprcparm(sub_solve_)
@@ -327,44 +384,6 @@ subroutine mld_dbjac_bld(a,p,upd,info,data)
       !
       ! ILU(k)/MILU(k)/ILU(k,t) factorization.
       !
-
-      !
-      ! In case of multiple block-Jacobi sweeps, clip into p%av(ap_nd_)
-      ! the off block-diagonal part of the local extended matrix. The
-      ! clipped matrix is then stored in CSR format.
-      !
-
-      if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
-        n_row = psb_cd_get_local_rows(p%desc_data)
-        n_col = psb_cd_get_local_cols(p%desc_data)
-        nrow_a = a%m 
-        ! The following is known to work 
-        ! given that the output from CLIP is in COO. 
-        call psb_sp_clip(a,p%av(mld_ap_nd_),info,&
-             & jmin=nrow_a+1,rscale=.false.,cscale=.false.)
-        if (info == 0) call psb_sp_clip(blck,atmp,info,&
-             & jmin=nrow_a+1,rscale=.false.,cscale=.false.)
-        if (info == 0) call psb_rwextd(n_row,p%av(mld_ap_nd_),info,b=atmp) 
-        if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
-             & afmt='csr',dupl=psb_dupl_add_)
-        if (info /= 0) then
-          call psb_errpush(4010,name,a_err='clip & psb_spcnv csr 4')
-          goto 9999
-        end if
-
-        k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
-        call psb_sum(ictxt,k)
-
-        if (k == 0) then 
-          !
-          ! If the off block-diagonal part is emtpy, there is no point in doing
-          ! multiple Jacobi sweeps. This is certain to happen when running
-          ! on a single processor.
-          !
-          p%iprcparm(mld_smooth_sweeps_) = 1
-        end if
-        call psb_sp_free(atmp,info) 
-      end if
       !
       ! Compute the incomplete LU factorization.
       !
@@ -383,35 +402,6 @@ subroutine mld_dbjac_bld(a,p,upd,info,data)
       call psb_spcnv(a,atmp,info,afmt='coo')
       if (info == 0) call psb_rwextd(n_row,atmp,info,b=blck) 
 
-      !
-      ! In case of multiple block-Jacobi sweeps, clip into p%av(ap_nd_)
-      ! the off block-diagonal part of the local extended matrix. The
-      ! clipped matrix is then stored in CSR format.
-      !
-      if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
-
-        if (info == 0) call psb_sp_clip(atmp,p%av(mld_ap_nd_),info,&
-             & jmin=atmp%m+1,rscale=.false.,cscale=.false.)
-
-        if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
-             & afmt='csr',dupl=psb_dupl_add_)
-        if (info /= 0) then
-          call psb_errpush(4010,name,a_err='psb_spcnv csr 6')
-          goto 9999
-        end if
-
-        k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
-        call psb_sum(ictxt,k)
-
-        if (k == 0) then 
-          !
-          ! If the off block-diagonal part is emtpy, there is no point in doing
-          ! multiple Jacobi sweeps. This is certain to happen when running
-          ! on a single processor.
-          !
-          p%iprcparm(mld_smooth_sweeps_) = 1
-        end if
-      endif
       !
       ! Compute the LU factorization.
       !
@@ -437,7 +427,7 @@ subroutine mld_dbjac_bld(a,p,upd,info,data)
       call psb_spcnv(a,atmp,info,afmt='csr')
       if (info == 0) call mld_sludist_bld(atmp,p%desc_data,p,info)
       if (info /= 0) then
-        call psb_errpush(4010,name,a_err='mld_slu_bld')
+        call psb_errpush(4010,name,a_err='mld_sludist_bld')
         goto 9999
       end if
 
@@ -462,34 +452,6 @@ subroutine mld_dbjac_bld(a,p,upd,info,data)
       n_col = psb_cd_get_local_cols(p%desc_data)
       call psb_rwextd(n_row,atmp,info,b=blck) 
 
-      !
-      ! In case of multiple block-Jacobi sweeps, clip into p%av(ap_nd_)
-      ! the off block-diagonal part of the local extended matrix. The
-      ! clipped matrix is then stored in CSR format.
-      !
-      if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
-
-        call psb_sp_clip(atmp,p%av(mld_ap_nd_),info,&
-             & jmin=atmp%m+1,rscale=.false.,cscale=.false.)
-        if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
-             & afmt='csr',dupl=psb_dupl_add_)
-        if (info /= 0) then
-          call psb_errpush(4010,name,a_err='psb_spcnv csr 8')
-          goto 9999
-        end if
-        k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
-        call psb_sum(ictxt,k)
-
-        if (k == 0) then 
-          !
-          ! If the off block-diagonal part is emtpy, there is no point in doing
-          ! multiple Jacobi sweeps. This is certain to happen when running
-          ! on a single processor.
-          !
-          p%iprcparm(mld_smooth_sweeps_) = 1
-        end if
-      endif
-
       !
       ! Compute the LU factorization.
       !
diff --git a/mlprec/mld_zbjac_bld.f90 b/mlprec/mld_zbjac_bld.f90
index 41d04047..93e15e2c 100644
--- a/mlprec/mld_zbjac_bld.f90
+++ b/mlprec/mld_zbjac_bld.f90
@@ -228,27 +228,28 @@ subroutine mld_zbjac_bld(a,p,upd,info,data)
     ! Clip into p%av(ap_nd_) the off block-diagonal part of the local
     ! matrix. The clipped matrix is then stored in CSR format.
     !
-    call psb_sp_clip(atmp,p%av(mld_ap_nd_),info,&
-         & jmin=atmp%m+1,rscale=.false.,cscale=.false.)
-    if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
-         & afmt='csr',dupl=psb_dupl_add_)
-    if (info /= 0) then
-      call psb_errpush(4010,name,a_err='psb_spcnv')
-      goto 9999
-    end if
-
-    k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
-    call psb_sum(ictxt,k)
-
-    if (k == 0) then
-      ! 
-      ! If the off diagonal part is emtpy, there is no point in doing
-      ! multiple Jacobi sweeps. This is certain to happen when running
-      ! on a single processor.
-      !
-      p%iprcparm(mld_smooth_sweeps_) = 1
+    if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
+      call psb_sp_clip(atmp,p%av(mld_ap_nd_),info,&
+           & jmin=atmp%m+1,rscale=.false.,cscale=.false.)
+      if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
+           & afmt='csr',dupl=psb_dupl_add_)
+      if (info /= 0) then
+        call psb_errpush(4010,name,a_err='psb_spcnv')
+        goto 9999
+      end if
+      
+      k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
+      call psb_sum(ictxt,k)
+      
+      if (k == 0) then
+        ! 
+        ! If the off diagonal part is emtpy, there is no point in doing
+        ! multiple Jacobi sweeps. This is certain to happen when running
+        ! on a single processor.
+        !
+        p%iprcparm(mld_smooth_sweeps_) = 1
+      end if
     end if
-
     if (debug_level >= psb_debug_outer_) &
          & write(debug_unit,*) me,' ',trim(name),' Factoring rows ',&
          & atmp%m,a%m,blck%m,atmp%ia2(atmp%m+1)-1
@@ -281,6 +282,19 @@ subroutine mld_zbjac_bld(a,p,upd,info,data)
         goto 9999
       end if
 
+    case(mld_sludist_)
+      !
+      ! LU factorization through the SuperLU_DIST package. This works only
+      ! when the matrix is distributed among the processes.
+      ! NOTE: Should have NO overlap here!!!!   
+      !
+      call psb_spcnv(a,atmp,info,afmt='csr')
+      if (info == 0) call mld_sludist_bld(atmp,p%desc_data,p,info)
+      if (info /= 0) then
+        call psb_errpush(4010,name,a_err='mld_sludist_bld')
+        goto 9999
+      end if
+
     case(mld_umf_)
       !
       ! LU factorization through the UMFPACK package.
@@ -318,6 +332,49 @@ subroutine mld_zbjac_bld(a,p,upd,info,data)
     !
   case(0)
 
+
+    !
+    ! In case of multiple block-Jacobi sweeps, clip into p%av(ap_nd_)
+    ! the off block-diagonal part of the local extended matrix. The
+    ! clipped matrix is then stored in CSR format.
+    !
+    
+    if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
+      n_row = psb_cd_get_local_rows(p%desc_data)
+      n_col = psb_cd_get_local_cols(p%desc_data)
+      nrow_a = a%m 
+      ! The following is known to work 
+      ! given that the output from CLIP is in COO. 
+      call psb_sp_clip(a,p%av(mld_ap_nd_),info,&
+           & jmin=nrow_a+1,rscale=.false.,cscale=.false.)
+      if (info == 0) call psb_sp_clip(blck,atmp,info,&
+           & jmin=nrow_a+1,rscale=.false.,cscale=.false.)
+      if (info == 0) call psb_rwextd(n_row,p%av(mld_ap_nd_),info,b=atmp) 
+      if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
+           & afmt='csr',dupl=psb_dupl_add_)
+      if (info /= 0) then
+        call psb_errpush(4010,name,a_err='clip & psb_spcnv csr 4')
+        goto 9999
+      end if
+      
+      k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
+      call psb_sum(ictxt,k)
+      
+      if (k == 0) then 
+        !
+        ! If the off block-diagonal part is emtpy, there is no point in doing
+        ! multiple Jacobi sweeps. This is certain to happen when running
+        ! on a single processor.
+        !
+        p%iprcparm(mld_smooth_sweeps_) = 1
+      end if
+      call psb_sp_free(atmp,info) 
+      if (info/=0) then
+        call psb_errpush(4010,name,a_err='psb_sp_free')
+        goto 9999
+      end if
+    end if
+
     ! 
     ! Compute a factorization of the diagonal block of the local matrix,
     ! according to the choice made by the user by setting p%iprcparm(sub_solve_)
@@ -328,44 +385,6 @@ subroutine mld_zbjac_bld(a,p,upd,info,data)
       !
       ! ILU(k)/MILU(k)/ILU(k,t) factorization.
       !
-
-      !
-      ! In case of multiple block-Jacobi sweeps, clip into p%av(ap_nd_)
-      ! the off block-diagonal part of the local extended matrix. The
-      ! clipped matrix is then stored in CSR format.
-      !
-
-      if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
-        n_row = psb_cd_get_local_rows(p%desc_data)
-        n_col = psb_cd_get_local_cols(p%desc_data)
-        nrow_a = a%m 
-        ! The following is known to work 
-        ! given that the output from CLIP is in COO. 
-        call psb_sp_clip(a,p%av(mld_ap_nd_),info,&
-             & jmin=nrow_a+1,rscale=.false.,cscale=.false.)
-        if (info == 0) call psb_sp_clip(blck,atmp,info,&
-             & jmin=nrow_a+1,rscale=.false.,cscale=.false.)
-        if (info == 0) call psb_rwextd(n_row,p%av(mld_ap_nd_),info,b=atmp) 
-        if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
-             & afmt='csr',dupl=psb_dupl_add_)
-        if (info /= 0) then
-          call psb_errpush(4010,name,a_err='clip & psb_spcnv csr 4')
-          goto 9999
-        end if
-
-        k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
-        call psb_sum(ictxt,k)
-
-        if (k == 0) then 
-          !
-          ! If the off block-diagonal part is emtpy, there is no point in doing
-          ! multiple Jacobi sweeps. This is certain to happen when running
-          ! on a single processor.
-          !
-          p%iprcparm(mld_smooth_sweeps_) = 1
-        end if
-        call psb_sp_free(atmp,info) 
-      end if
       !
       ! Compute the incomplete LU factorization.
       !
@@ -384,35 +403,6 @@ subroutine mld_zbjac_bld(a,p,upd,info,data)
       call psb_spcnv(a,atmp,info,afmt='coo')
       if (info == 0) call psb_rwextd(n_row,atmp,info,b=blck) 
 
-      !
-      ! In case of multiple block-Jacobi sweeps, clip into p%av(ap_nd_)
-      ! the off block-diagonal part of the local extended matrix. The
-      ! clipped matrix is then stored in CSR format.
-      !
-      if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
-
-        if (info == 0) call psb_sp_clip(atmp,p%av(mld_ap_nd_),info,&
-             & jmin=atmp%m+1,rscale=.false.,cscale=.false.)
-        
-        if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
-             & afmt='csr',dupl=psb_dupl_add_)
-        if (info /= 0) then
-          call psb_errpush(4010,name,a_err='psb_spcnv csr 6')
-          goto 9999
-        end if
-
-        k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
-        call psb_sum(ictxt,k)
-
-        if (k == 0) then 
-          !
-          ! If the off block-diagonal part is emtpy, there is no point in doing
-          ! multiple Jacobi sweeps. This is certain to happen when running
-          ! on a single processor.
-          !
-          p%iprcparm(mld_smooth_sweeps_) = 1
-        end if
-      endif
       !
       ! Compute the LU factorization.
       !
@@ -438,7 +428,7 @@ subroutine mld_zbjac_bld(a,p,upd,info,data)
       call psb_spcnv(a,atmp,info,afmt='csr')
       if (info == 0) call mld_sludist_bld(atmp,p%desc_data,p,info)
       if (info /= 0) then
-        call psb_errpush(4010,name,a_err='mld_slu_bld')
+        call psb_errpush(4010,name,a_err='mld_sludist_bld')
         goto 9999
       end if
 
@@ -463,34 +453,6 @@ subroutine mld_zbjac_bld(a,p,upd,info,data)
       n_col = psb_cd_get_local_cols(p%desc_data)
       call psb_rwextd(n_row,atmp,info,b=blck) 
 
-      !
-      ! In case of multiple block-Jacobi sweeps, clip into p%av(ap_nd_)
-      ! the off block-diagonal part of the local extended matrix. The
-      ! clipped matrix is then stored in CSR format.
-      !
-      if (p%iprcparm(mld_smooth_sweeps_) > 1) then 
-
-        call psb_sp_clip(atmp,p%av(mld_ap_nd_),info,&
-             & jmin=atmp%m+1,rscale=.false.,cscale=.false.)
-        if (info == 0) call psb_spcnv(p%av(mld_ap_nd_),info,&
-             & afmt='csr',dupl=psb_dupl_add_)
-        if (info /= 0) then
-          call psb_errpush(4010,name,a_err='psb_spcnv csr 8')
-          goto 9999
-        end if
-        k = psb_sp_get_nnzeros(p%av(mld_ap_nd_))
-        call psb_sum(ictxt,k)
-
-        if (k == 0) then 
-          !
-          ! If the off block-diagonal part is emtpy, there is no point in doing
-          ! multiple Jacobi sweeps. This is certain to happen when running
-          ! on a single processor.
-          !
-          p%iprcparm(mld_smooth_sweeps_) = 1
-        end if
-      endif
-
       !
       ! Compute the LU factorization.
       !