From e3de565b6dc31f6cd566547ee8622544d86593a8 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 26 Jul 2023 14:47:05 +0200 Subject: [PATCH] Updated commeents in SOC1 --- amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 | 14 ++++++++++++++ amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 | 14 ++++++++++++++ amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 | 14 ++++++++++++++ amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 | 14 ++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 index 91acfefe..eb6b0eac 100644 --- a/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 @@ -178,6 +178,18 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in block integer(psb_ipk_), allocatable :: bnds(:), locnaggr(:) integer(psb_ipk_) :: myth,nths, kk + ! The parallelization makes use of a locaggr(:) array; each thread + ! keeps its own version of naggr, and when the loop ends, a prefix is applied + ! to locnaggr to determine: + ! 1. The total number of aggregaters NAGGR; + ! 2. How much should each thread shift its own aggregates + ! Part 2 requires to keep track of which thread defined each entry + ! of ilaggr(), so that each entry can be adjusted correctly: even + ! if an entry I belongs to the range BNDS(TH)>BNDS(TH+1)-1, it may have + ! been set because it is strongly connected to an entry J belonging to a + ! different thread. + + !$omp parallel shared(bnds,ioffs,locnaggr,ilaggr,nr,naggr,diag,theta,nths) & !$omp private(icol,val,myth,kk) block @@ -231,6 +243,8 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ip = 0 do k=1, nz j = icol(k) + ! If any of the neighbours is already assigned, + ! we will not reset. if (ilaggr(j) > 0) cycle step1 if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then ip = ip + 1 diff --git a/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 index 0c76f269..241f0568 100644 --- a/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 @@ -178,6 +178,18 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in block integer(psb_ipk_), allocatable :: bnds(:), locnaggr(:) integer(psb_ipk_) :: myth,nths, kk + ! The parallelization makes use of a locaggr(:) array; each thread + ! keeps its own version of naggr, and when the loop ends, a prefix is applied + ! to locnaggr to determine: + ! 1. The total number of aggregaters NAGGR; + ! 2. How much should each thread shift its own aggregates + ! Part 2 requires to keep track of which thread defined each entry + ! of ilaggr(), so that each entry can be adjusted correctly: even + ! if an entry I belongs to the range BNDS(TH)>BNDS(TH+1)-1, it may have + ! been set because it is strongly connected to an entry J belonging to a + ! different thread. + + !$omp parallel shared(bnds,ioffs,locnaggr,ilaggr,nr,naggr,diag,theta,nths) & !$omp private(icol,val,myth,kk) block @@ -231,6 +243,8 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ip = 0 do k=1, nz j = icol(k) + ! If any of the neighbours is already assigned, + ! we will not reset. if (ilaggr(j) > 0) cycle step1 if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then ip = ip + 1 diff --git a/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 index fe5701ef..329cd3ba 100644 --- a/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 @@ -178,6 +178,18 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in block integer(psb_ipk_), allocatable :: bnds(:), locnaggr(:) integer(psb_ipk_) :: myth,nths, kk + ! The parallelization makes use of a locaggr(:) array; each thread + ! keeps its own version of naggr, and when the loop ends, a prefix is applied + ! to locnaggr to determine: + ! 1. The total number of aggregaters NAGGR; + ! 2. How much should each thread shift its own aggregates + ! Part 2 requires to keep track of which thread defined each entry + ! of ilaggr(), so that each entry can be adjusted correctly: even + ! if an entry I belongs to the range BNDS(TH)>BNDS(TH+1)-1, it may have + ! been set because it is strongly connected to an entry J belonging to a + ! different thread. + + !$omp parallel shared(bnds,ioffs,locnaggr,ilaggr,nr,naggr,diag,theta,nths) & !$omp private(icol,val,myth,kk) block @@ -231,6 +243,8 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ip = 0 do k=1, nz j = icol(k) + ! If any of the neighbours is already assigned, + ! we will not reset. if (ilaggr(j) > 0) cycle step1 if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then ip = ip + 1 diff --git a/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 index 54c3add4..697a55b3 100644 --- a/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 @@ -178,6 +178,18 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in block integer(psb_ipk_), allocatable :: bnds(:), locnaggr(:) integer(psb_ipk_) :: myth,nths, kk + ! The parallelization makes use of a locaggr(:) array; each thread + ! keeps its own version of naggr, and when the loop ends, a prefix is applied + ! to locnaggr to determine: + ! 1. The total number of aggregaters NAGGR; + ! 2. How much should each thread shift its own aggregates + ! Part 2 requires to keep track of which thread defined each entry + ! of ilaggr(), so that each entry can be adjusted correctly: even + ! if an entry I belongs to the range BNDS(TH)>BNDS(TH+1)-1, it may have + ! been set because it is strongly connected to an entry J belonging to a + ! different thread. + + !$omp parallel shared(bnds,ioffs,locnaggr,ilaggr,nr,naggr,diag,theta,nths) & !$omp private(icol,val,myth,kk) block @@ -231,6 +243,8 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ip = 0 do k=1, nz j = icol(k) + ! If any of the neighbours is already assigned, + ! we will not reset. if (ilaggr(j) > 0) cycle step1 if (abs(val(k)) > theta*sqrt(abs(diag(i)*diag(j)))) then ip = ip + 1