@@ -54,17 +54,18 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing)
5454 return (bb, nothing )
5555end
5656
57- # Construct Dom Tree
58- # Simple algorithm - TODO : Switch to the fast version (e.g. https://tanujkhattar.wordpress.com/2016/01/11/dominator-tree-of-a-directed-graph/)
59- function construct_domtree (cfg:: CFG )
57+ function naive_idoms (cfg:: CFG )
6058 nblocks = length (cfg. blocks)
6159 dom_all = BitSet (1 : nblocks)
6260 dominators = BitSet[n == 1 ? BitSet (1 ) : copy (dom_all) for n = 1 : nblocks]
6361 changed = true
6462 while changed
6563 changed = false
6664 for n = 2 : nblocks
67- isempty (cfg. blocks[n]. preds) && continue
65+ if isempty (cfg. blocks[n]. preds)
66+ isempty (dominators[n]) || (dominators[n] = BitSet ())
67+ continue
68+ end
6869 firstp, rest = Iterators. peel (Iterators. filter (p-> p != 0 , cfg. blocks[n]. preds))
6970 new_doms = copy (dominators[firstp])
7071 for p in rest
@@ -91,7 +92,16 @@ function construct_domtree(cfg::CFG)
9192 idoms[i] = dom
9293 end
9394 end
95+ idoms
96+ end
97+
98+ # Construct Dom Tree
99+ function construct_domtree (cfg:: CFG )
100+ idoms = SNCA (cfg)
101+ nidoms = naive_idoms (cfg)
102+ @assert idoms == nidoms
94103 # Compute children
104+ nblocks = length (cfg. blocks)
95105 domtree = DomTreeNode[DomTreeNode () for _ = 1 : nblocks]
96106 for (idx, idom) in Iterators. enumerate (idoms)
97107 (idx == 1 || idom == 0 ) && continue
@@ -101,3 +111,165 @@ function construct_domtree(cfg::CFG)
101111 update_level! (domtree, 1 , 1 )
102112 DomTree (idoms, domtree)
103113end
114+
115+ #= =============================== [SNCA] ======================================#
116+ #
117+ # This section implements the Semi-NCA (SNCA) dominator tree construction from
118+ # described in Georgiadis' PhD thesis [LG05], which itself is a simplification
119+ # of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches
120+ # the algorithm choice in LLVM and seems to be a sweet spot in implementation
121+ # simplicity and efficiency.
122+ #
123+ # [LG05] Linear-Time Algorithms for Dominators and Related Problems
124+ # Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
125+ # ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
126+ #
127+ # [LT79] A fast algorithm for finding dominators in a flowgraph
128+ # Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1
129+ # http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf
130+ #
131+ begin
132+ # We could make these real structs, but probably not worth the extra
133+ # overhead. Still, give them names for documentary purposes.
134+ const BBNumber = UInt
135+ const DFSNumber = UInt
136+
137+ """
138+ Keeps the per-BB state of the Semi NCA algorithm. In the original
139+ formulation, there are three separate length `n` arrays, `label`, `semi` and
140+ `ancestor`. Instead, for efficiency, we use one array in a array-of-structs
141+ style setup.
142+ """
143+ struct Node
144+ semi:: DFSNumber
145+ label:: DFSNumber
146+ end
147+
148+ struct DFSTree
149+ # Maps DFS number to BB number
150+ numbering:: Vector{BBNumber}
151+ # Maps BB number to DFS number
152+ reverse:: Vector{DFSNumber}
153+ # Records parent relationships in the DFS tree (DFS number -> DFS number)
154+ # Storing it this way saves a few lookups in the snca_compress! algorithm
155+ parents:: Vector{DFSNumber}
156+ end
157+ length (D:: DFSTree ) = length (D. numbering)
158+ preorder (D:: DFSTree ) = OneTo (length (D))
159+ _drop (xs:: AbstractUnitRange , n:: Integer ) = (first (xs)+ n): last (xs)
160+
161+ function DFSTree (nblocks:: Int )
162+ DFSTree (
163+ Vector {BBNumber} (undef, nblocks),
164+ zeros (DFSNumber, nblocks),
165+ Vector {DFSNumber} (undef, nblocks))
166+ end
167+
168+ function DFS (cfg:: CFG , current_node:: BBNumber ):: DFSTree
169+ dfs = DFSTree (length (cfg. blocks))
170+ # TODO : We could reuse the storage in DFSTree for our worklist. We're
171+ # guaranteed for the worklist to be smaller than the remaining space in
172+ # DFSTree
173+ worklist = Tuple{DFSNumber, BBNumber}[(0 , current_node)]
174+ dfs_num = 1
175+ parent = 0
176+ while ! isempty (worklist)
177+ (parent, current_node) = pop! (worklist)
178+ dfs. reverse[current_node] = dfs_num
179+ dfs. numbering[dfs_num] = current_node
180+ dfs. parents[dfs_num] = parent
181+ for succ in cfg. blocks[current_node]. succs
182+ dfs. reverse[succ] != 0 && continue
183+ # Mark things that are currently in the worklist
184+ dfs. reverse[succ] = 1
185+ push! (worklist, (dfs_num, succ))
186+ end
187+ dfs_num += 1
188+ end
189+ # If all blocks are reachable, this is a no-op, otherwise,
190+ # we shrink these arrays.
191+ resize! (dfs. numbering, dfs_num - 1 )
192+ resize! (dfs. parents, dfs_num - 1 )
193+ dfs
194+ end
195+
196+ """
197+ Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the
198+ modification suggested in the paper to use `last_linked` to determine
199+ whether an ancestor has been processed rather than storing `0` in the
200+ ancestor array.
201+ """
202+ function snca_compress! (state:: Vector{Node} , ancestors:: Vector{DFSNumber} ,
203+ v:: DFSNumber , last_linked:: DFSNumber )
204+ u = ancestors[v]
205+ @assert u < v
206+ if u >= last_linked
207+ snca_compress! (state, ancestors, u, last_linked)
208+ if state[u]. label < state[v]. label
209+ state[v] = Node (state[v]. semi, state[u]. label)
210+ end
211+ ancestors[v] = ancestors[u]
212+ end
213+ nothing
214+ end
215+
216+ """
217+ The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05].
218+ Note that the pseudocode in [LG05] is not entirely accurate.
219+ The best way to understand what's happening is to read [LT79], then the
220+ description of SLT in in [LG05] (warning: inconsistent notation), then
221+ the description of Semi-NCA.
222+ """
223+ function SNCA (cfg:: CFG )
224+ D = DFS (cfg, BBNumber (1 ))
225+ # `label` is initialized to the identity mapping (though
226+ # the paper doesn't make that clear). The rational for this is Lemma
227+ # 2.4 in [LG05] (i.e. Theorem 4 in ). Note however, that we don't
228+ # ever look at `semi` until it is fully initialized, so we could leave
229+ # it unitialized here if we wanted to.
230+ state = Node[ Node (typemax (DFSNumber), w) for w in preorder (D) ]
231+ # Initialize idoms to parents. Note that while idoms are eventually
232+ # BB indexed, we keep it DFS indexed until a final post-processing
233+ # pass to avoid extra memory references during the O(N^2) phase below.
234+ idoms_dfs = copy (D. parents)
235+ # We abuse the parents array as the ancestors array.
236+ # Semi-NCA does not look at the parents array at all.
237+ # SLT would, but never simultaneously, so we could still
238+ # do this.
239+ ancestors = D. parents
240+ for w ∈ reverse (_drop (preorder (D), 1 ))
241+ # LLVM initializes this to the parent, the paper initializes this to
242+ # `w`, but it doesn't really matter (the parent is a predecessor,
243+ # so at worst we'll discover it below). Save a memory reference here.
244+ semi_w = typemax (DFSNumber)
245+ for v ∈ cfg. blocks[D. numbering[w]]. preds
246+ # For the purpose of the domtree, ignore virtual predecessors
247+ # into catch blocks.
248+ v == 0 && continue
249+ vdfs = D. reverse[v]
250+ # Ignore unreachable predecessors
251+ vdfs == 0 && continue
252+ last_linked = DFSNumber (w + 1 )
253+ # N.B.: This conditional is missing from the psuedocode
254+ # in figure 2.8 of [LG05]. It corresponds to the
255+ # `ancestor[v] != 0` check in the `eval` implementation in
256+ # figure 2.6
257+ if vdfs >= last_linked
258+ snca_compress! (state, ancestors, vdfs, last_linked)
259+ end
260+ semi_w = min (semi_w, state[vdfs]. label)
261+ end
262+ state[w] = Node (semi_w, semi_w)
263+ end
264+ for v ∈ _drop (preorder (D), 1 )
265+ idom = idoms_dfs[v]
266+ vsemi = state[v]. semi
267+ while idom > vsemi
268+ idom = idoms_dfs[idom]
269+ end
270+ idoms_dfs[v] = idom
271+ end
272+ idoms_bb = Int[ (i == 1 || D. reverse[i] == 0 ) ? 0 : D. numbering[idoms_dfs[D. reverse[i]]] for i = 1 : length (cfg. blocks) ]
273+ idoms_bb
274+ end
275+ end
0 commit comments