! File: psb_descasb.f90 ! ! Subroutine: psb_descasb ! This routine takes a matrix A with its descriptor, and builds the ! auxiliary descriptor corresponding to the number of overlap levels ! specified on input. This is the actual worker horse..... ! Note that n_ovr > 0 thanks to the caller routine. ! ! Parameters: ! n_ovr - integer. The number of overlap levels ! desc_p - type(). The communication descriptor for the preconditioner. ! desc_a - type(). The communication descriptor. ! a - type( desc_a%halo_index Allocate(tmp_ovr_idx(l_tmp_ovr_idx),tmp_halo(l_tmp_halo)) desc_p%ovrlap_elem(:) = -1 tmp_ovr_idx(:) = -1 tmp_halo(:) = -1 counter_e = 1 tot_recv = 0 counter_h = 1 counter_o = 1 ! See comment in main loop below. call InitPairSearchTree(info) if (info.ne.0) then info=4010 ch_err='InitPairSearhTree' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if if (debug) write(0,*) myrow,'Done InitPairSearchTree',info ! ! A picture is in order to understand what goes on here. ! I is the internal part; H is halo, R row, C column. The final ! matrix with N levels of overlap looks like this ! ! I | Hc1 | 0 | 0 | ! -------|-----|-----|-----| ! Hr1 | Hd1 | Hc2 | 0 | ! -------|-----|-----|-----| ! 0 | Hr2 | Hd2 | Hc2 | ! -------|-----|-----|-----| ! 0 | 0 | Hr3 | Hd3 | Hc3 ! ! At the start we already have I and Hc1, so we know the row ! indices that will make up Hr1, and also who owns them. As we ! actually get those rows, we receive the column indices in Hc2; ! these define the row indices for Hr2, and so on. When we have ! reached the desired level HrN, we may ignore HcN. ! ! Do i_ovr=1,n_ovr if (debug) write(0,*) myrow,'Running on overlap level ',i_ovr,' of ',n_ovr !!$ t_halo_in(:) = -1 ! ! At this point, halo contains a valid halo corresponding to the ! matrix enlarged with the elements in the frontier for I_OVR-1. ! At the start, this is just the halo for A; the rows for indices in ! the first halo will contain column indices defining the second halo ! level and so on. ! bsdindx(:) = 0 sdsz(:) = 0 brvindx(:) = 0 rvsz(:) = 0 idxr = 0 idxs = 0 counter = 1 counter_t = 1 t1 = mpi_wtime() Do While (halo(counter) /= -1) tot_elem=0 proc=halo(counter+psb_proc_id_) n_elem_recv=halo(counter+psb_n_elem_recv_) n_elem_send=halo(counter+n_elem_recv+psb_n_elem_send_) If ((counter+n_elem_recv+n_elem_send) > Size(halo)) then info=-1 call psb_errpush(info,name) goto 9999 end If tot_recv=tot_recv+n_elem_recv if (debug) write(0,*) myrow,' DESCASB tot_recv:',proc,n_elem_recv,tot_recv ! ! While running through the column indices exchanged with other procs ! we have to keep track of which elements actually are overlap and halo ! ones to record them in overlap_elem. We do this by maintaining ! an AVL balanced search tree: at each point counter_e is the next ! free index element. The search routine for gidx will return ! glx if gidx was already assigned a local index (glxSize(halo)) then info=-2 call psb_errpush(info,name) goto 9999 end If idx = halo(counter+psb_elem_recv_+j) idx = halo(counter+psb_elem_recv_+j) If(idx > Size(desc_p%loc_to_glob)) then info=-3 call psb_errpush(info,name) goto 9999 endif gidx = desc_p%loc_to_glob(idx) If((counter_o+2) > Size(tmp_ovr_idx)) Then isz = max((3*Size(tmp_ovr_idx))/2,(counter_o+3)) if (debug) write(0,*) myrow,'Realloc tmp_ovr',isz call psb_realloc(isz,tmp_ovr_idx,info,pad=-1) if (info.ne.0) then info=4010 ch_err='psb_realloc' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if End If tmp_ovr_idx(counter_o)=proc tmp_ovr_idx(counter_o+1)=1 tmp_ovr_idx(counter_o+2)=gidx tmp_ovr_idx(counter_o+3)=-1 counter_o=counter_o+3 If((counter_h+2) > Size(tmp_halo)) Then isz = max((3*Size(tmp_halo))/2,(counter_h+3)) if (debug) write(0,*) myrow,'Realloc tmp_halo',isz call psb_realloc(isz,tmp_halo,info) if (info.ne.0) then info=4010 ch_err='psb_realloc' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if End If tmp_halo(counter_h)=proc tmp_halo(counter_h+1)=1 tmp_halo(counter_h+2)=idx tmp_halo(counter_h+3)=-1 counter_h=counter_h+3 call SearchInsKeyVal(gidx,counter_e,glx,info) !!$ if (debug) write(0,*) 'From searchInsKey ',gidx,glx,counter_e,info if (info>=0) then If (glx < counter_e) Then desc_p%ovrlap_elem(glx+psb_n_dom_ovr_)= & & desc_p%ovrlap_elem(glx+psb_n_dom_ovr_)+1 Else If((counter_e+2) > Size(desc_p%ovrlap_elem)) Then isz = max((3*Size(desc_p%ovrlap_elem))/2,(counter_e+3)) if (debug) write(0,*) myrow,'Realloc ovr_El',isz call psb_realloc(isz,desc_p%ovrlap_elem,info) if (info.ne.0) then info=4010 ch_err='psrealloc' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if End If !!$ if (debug) write(0,*) 'Adding into ovrlap ',gidx,glx,counter_e,info desc_p%ovrlap_elem(counter_e)=gidx desc_p%ovrlap_elem(counter_e+psb_n_dom_ovr_)=2 desc_p%ovrlap_elem(counter_e+2)=-1 counter_e = counter_e + 2 End If else write(0,*) myrow, 'Descasb From SearchInsKeyVal: ',info endif Enddo if (debug) write(0,*) myrow,'Checktmp_o_i Loop Mid1',tmp_ovr_idx(1:10) counter = counter+n_elem_recv ! ! add send elements in halo_index into ovrlap_index ! Do j=0,n_elem_send-1 idx = halo(counter+psb_elem_send_+j) gidx = desc_p%loc_to_glob(idx) If((counter_o+2) > Size(tmp_ovr_idx)) Then isz = max((3*Size(tmp_ovr_idx))/2,(counter_o+3)) if (debug) write(0,*) myrow,'Realloc tmp_ovr',isz call psb_realloc(isz,tmp_ovr_idx,info) if (info.ne.0) then info=4010 ch_err='psrealloc' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if End If tmp_ovr_idx(counter_o)=proc tmp_ovr_idx(counter_o+1)=1 tmp_ovr_idx(counter_o+2)=gidx tmp_ovr_idx(counter_o+3)=-1 counter_o=counter_o+3 call SearchInsKeyVal(gidx,counter_e,glx,info) !!$ if (debug) write(0,*) 'From searchInsKey ',gidx,glx,counter_e,info if (info>=0) then If (glx < counter_e) Then desc_p%ovrlap_elem(glx+psb_n_dom_ovr_)= & & desc_p%ovrlap_elem(glx+psb_n_dom_ovr_)+1 Else If((counter_e+2) > Size(desc_p%ovrlap_elem)) Then isz = max((3*Size(desc_p%ovrlap_elem))/2,(counter_e+3)) if (debug) write(0,*) myrow,'Realloc ovr_el',isz call psb_realloc(isz,desc_p%ovrlap_elem,info) if (info.ne.0) then info=4010 ch_err='psb_realloc' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if End If !!$ if (debug) write(0,*) 'Adding into ovrlap ',gidx,glx,counter_e,info desc_p%ovrlap_elem(counter_e)=gidx desc_p%ovrlap_elem(counter_e+psb_n_dom_ovr_)=2 desc_p%ovrlap_elem(counter_e+2)=-1 counter_e = counter_e + 2 End If else write(0,*) myrow,'Descasb From SearchInsKeyVal: ',info endif ! ! Prepare to exchange the halo rows with the other proc. ! If (i_ovr < (n_ovr)) Then call psb_spinfo(psb_nzrowreq_,a,n_elem,info,iaux=idx) if (info.ne.0) then info=4010 ch_err='psb_spinfo' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if If((idxs+tot_elem+n_elem) > lworks) Then isz = max((3*lworks)/2,(idxs+tot_elem+n_elem)) if (debug) write(0,*) myrow,'Realloc works',isz call psb_realloc(isz,works,info) if (info.ne.0) then info=4010 ch_err='psb_realloc' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if lworks = isz End If If((n_elem) > size(blk%ia2)) Then isz = max((3*size(blk%ia2))/2,(n_elem)) if (debug) write(0,*) myrow,'Realloc blk',isz call psb_spreall(blk,isz,info) if (info.ne.0) then info=4010 ch_err='psb_spreall' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if End If call psb_spgtrow(idx,a,blk,info) if (info.ne.0) then info=4010 ch_err='psb_spgtrow' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if Do jj=1,n_elem works(idxs+tot_elem+jj)=desc_p%loc_to_glob(blk%ia2(jj)) End Do tot_elem=tot_elem+n_elem End If Enddo t4 = t4 + mpi_wtime() -t3 if (i_ovr < n_ovr) then if (tot_elem > 1) then !!$ write(0,*) me,'Realloc temp',tot_elem+2 if (tot_elem+2 > size(temp)) then !!$ write(0,*) me,'Realloc temp',tot_elem+2 deallocate(temp) allocate(temp(tot_elem+2),stat=info) endif Call mrgsrt(tot_elem,works(idxs+1),temp,info) If (info.Eq.0) Call ireordv1(tot_elem,works(idxs+1),temp) lx = works(idxs+1) i = 1 j = 1 do j = j + 1 if (j > tot_elem) exit if (works(idxs+j) /= lx) then i = i + 1 works(idxs+i) = works(idxs+j) lx = works(idxs+i) end if end do tot_elem = i endif if (debug) write(0,*) myrow,'Checktmp_o_i Loop Mid2',tmp_ovr_idx(1:10) sdsz(proc+1) = tot_elem idxs = idxs + tot_elem end if counter = counter+n_elem_send+3 if (debug) write(0,*) myrow,'Checktmp_o_i Loop End',tmp_ovr_idx(1:10) Enddo if (debug) write(0,*)myrow,'End phase 1 DESCASB', m, n_col, tot_recv if (i_ovr < n_ovr) then ! ! Exchange data requests with everybody else: so far we have ! accumulated RECV requests, we have an all-to-all to build ! matchings SENDs. ! call mpi_alltoall(sdsz,1,mpi_integer,rvsz,1,mpi_integer,icomm,info) if (info.ne.0) then info=4010 ch_err='mpi_alltoall' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if idxs = 0 idxr = 0 counter = 1 Do proc=halo(counter) if (proc == -1) exit n_elem_recv = halo(counter+psb_n_elem_recv_) counter = counter+n_elem_recv n_elem_send = halo(counter+psb_n_elem_send_) bsdindx(proc+1) = idxs idxs = idxs + sdsz(proc+1) brvindx(proc+1) = idxr idxr = idxr + rvsz(proc+1) counter = counter+n_elem_send+3 Enddo iszr=sum(rvsz) if (max(iszr,1) > lworkr) then call psb_realloc(max(iszr,1),workr,info) if (info.ne.0) then info=4010 ch_err='psb_realloc' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if lworkr=max(iszr,1) end if call mpi_alltoallv(works,sdsz,bsdindx,mpi_integer,& & workr,rvsz,brvindx,mpi_integer,icomm,info) if (info.ne.0) then info=4010 ch_err='mpi_alltoallv' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if Do i=1,iszr idx=workr(i) if (idx <1) then write(0,*) myrow,'Error in DESCASB ',idx,i,iszr !!$ write(0,*) myrow, ' WORKR :',workr(1:iszr) else If (desc_p%glob_to_loc(idx) < -np) Then ! ! This is a new index. Assigning a local index as ! we receive the guarantees that all indices for HALO(I) ! will be less than those for HALO(J) whenever I Size(desc_p%loc_to_glob)) Then isz = 3*n_col/2 if (debug) write(0,*) myrow,'Realloc loc_to_glob' call psb_realloc(isz,desc_p%loc_to_glob,info) if (info.ne.0) then info=4010 ch_err='psrealloc' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if End If desc_p%glob_to_loc(idx)=n_col desc_p%loc_to_glob(n_col)=idx If((counter_t+3) > Size(t_halo_in))Write(0,*)'bingo' t_halo_in(counter_t)=proc_id t_halo_in(counter_t+1)=1 t_halo_in(counter_t+2)=n_col counter_t=counter_t+3 if (debug) write(0,*) myrow,' DESCASB: Added into t_halo_in from recv',& &proc_id,n_col,idx else if (desc_p%glob_to_loc(idx) < 0) Then if (debug) write(0,*) myrow,'Wrong input to descasb??',& &idx,desc_p%glob_to_loc(idx) End If End Do end if t2 = mpi_wtime() n_row=m+tot_recv desc_p%matrix_data(psb_n_row_)=n_row desc_p%matrix_data(psb_n_col_)=n_col ! ! Ok, now we have a temporary halo with all the info for the ! next round. If we need to keep going, convert the halo format ! from temporary to final, so that we can work out the next iteration. ! This uses one of the convert_comm internals, i.e. we are doing ! the equivalent of a partial call to convert_comm ! If (i_ovr < (n_ovr)) Then If(lwork < (counter_t/3+np*3)) Then isz = max((3*lwork)/2,(counter_t/3+np*3)) if (debug) write(0,*) myrow,'Realloc work',isz deallocate(work) allocate(work(isz),stat=info) lwork=size(work) Endif t_halo_in(counter_t)=-1 !!$ !!$ t_halo_out(:)=-1 !!$ ierr = MPE_Log_event( icrhb, 0, "st CRTHAL" ) if (debug) write(0,*) myrow,'Checktmp_o_i 1',tmp_ovr_idx(1:10) if (debug) write(0,*) myrow,'Calling Crea_Halo' call psi_crea_index(desc_p,t_halo_in,t_halo_out,.false.,info) !!$ Call psi_crea_halo(desc_p%matrix_data,t_halo_in,& !!$ & np,t_halo_out,Size(t_halo_out),dep_list,& !!$ & dl_lda,length_dl,desc_p%loc_to_glob,& !!$ & desc_p%glob_to_loc,work,lwork,info) if (debug) then write(0,*) myrow,'Done Crea_Halo' call blacs_barrier(icontxt,'All') end if if (debug) write(0,*) myrow,'Checktmp_o_i 2',tmp_ovr_idx(1:10) if (debug) write(0,*) myrow,'Done Crea_Halo' !!$ ierr = MPE_Log_event( icrhe, 0, "ed CRHAL " ) halo => t_halo_out ! ! At this point we have built the halo necessary for I_OVR+1. ! End If if (debug) write(0,*) myrow,'Checktmp_o_i ',tmp_ovr_idx(1:10) t3 = mpi_wtime() tl = tl +(t2-t1) tch = tch +(t3-t2) End Do t1 = mpi_wtime() call FreePairSearchTree() !!$ ierr = MPE_Log_event( idsce, 0, "st DSCASB" ) desc_p%matrix_data(psb_m_)=desc_a%matrix_data(psb_m_) desc_p%matrix_data(psb_n_)=desc_a%matrix_data(psb_n_) tmp_halo(counter_h)=-1 tmp_ovr_idx(counter_o)=-1 !!$ ierr = MPE_Log_event( iovrb, 0, "st CNVCRT" ) ! ! At this point we have gathered all the indices in the halo at ! N levels of overlap. Just call convert_comm. This is ! the same routine as gets called inside SPASB. ! if (debug) then write(0,*) 'psb_dscasb: converting indexes' call blacs_barrier(icontxt,'All') end if !.... convert comunication stuctures.... ! first the halo index call psi_crea_index(desc_p,tmp_halo,& & desc_p%halo_index,.false.,info) if(info.ne.0) then call psb_errpush(4010,name,a_err='psi_crea_index') goto 9999 end if ! then the overlap index call psi_crea_index(desc_p,tmp_ovr_idx,& & desc_p%ovrlap_index,.true.,info) if(info.ne.0) then call psb_errpush(4010,name,a_err='psi_crea_index') goto 9999 end if ! next is the ovrlap_elem index call psi_crea_ovr_elem(desc_p%ovrlap_index,desc_p%ovrlap_elem) ! finally bnd_elem call psi_crea_bnd_elem(desc_p,info) if(info.ne.0) then call psb_errpush(4010,name,a_err='psi_crea_bnd_elem') goto 9999 end if ! Ok, register into MATRIX_DATA & free temporary work areas desc_p%matrix_data(psb_dec_type_) = psb_desc_asb_ allocate(desc_p%lprm(1)) desc_p%lprm(1) = 0 if (debug) then write(0,*) myrow,'Done Convert_comm' call blacs_barrier(icontxt,'All') end if if (.false.) then call psb_descprt(70+myrow,desc_p,.false.) end if if (debug) write(0,*) myrow,'Done ConvertComm' !!$ ierr = MPE_Log_event( iovre, 0, "ed CNVCRT" ) Deallocate(works,workr,t_halo_in,t_halo_out,work,& & length_dl,dep_list,tmp_ovr_idx,tmp_halo,& & brvindx,rvsz,sdsz,bsdindx,temp,stat=info) call psb_spfree(blk,info) if (info.ne.0) then info=4010 ch_err='spfree' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if t2 = mpi_wtime() call psb_erractionrestore(err_act) return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.act_abort) then call psb_error(icontxt) return end if return End Subroutine psb_descasb