1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // This file defines the lifting pass which tries to "lift" Alloc
8 // cells (new/local variables) into SSA registers, replacing loads
9 // with the dominating stored value, eliminating loads and stores, and
10 // inserting φ-nodes as needed.
12 // Cited papers and resources:
14 // Ron Cytron et al. 1991. Efficiently computing SSA form...
15 // http://doi.acm.org/10.1145/115372.115320
17 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
18 // Software Practice and Experience 2001, 4:1-10.
19 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
21 // Daniel Berlin, llvmdev mailing list, 2012.
22 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
23 // (Be sure to expand the whole thread.)
25 // TODO(adonovan): opt: there are many optimizations worth evaluating, and
26 // the conventional wisdom for SSA construction is that a simple
27 // algorithm well engineered often beats those of better asymptotic
28 // complexity on all but the most egregious inputs.
30 // Danny Berlin suggests that the Cooper et al. algorithm for
31 // computing the dominance frontier is superior to Cytron et al.
32 // Furthermore he recommends that rather than computing the DF for the
33 // whole function then renaming all alloc cells, it may be cheaper to
34 // compute the DF for each alloc cell separately and throw it away.
36 // Consider exploiting liveness information to avoid creating dead
37 // φ-nodes which we then immediately remove.
39 // Also see many other "TODO: opt" suggestions in the code.
49 // If true, show diagnostic information at each step of lifting.
51 const debugLifting = false
53 // domFrontier maps each block to the set of blocks in its dominance
54 // frontier. The outer slice is conceptually a map keyed by
55 // Block.Index. The inner slice is conceptually a set, possibly
56 // containing duplicates.
58 // TODO(adonovan): opt: measure impact of dups; consider a packed bit
59 // representation, e.g. big.Int, and bitwise parallel operations for
60 // the union step in the Children loop.
62 // domFrontier's methods mutate the slice's elements but not its
63 // length, so their receivers needn't be pointers.
65 type domFrontier [][]*BasicBlock
67 func (df domFrontier) add(u, v *BasicBlock) {
72 // build builds the dominance frontier df for the dominator (sub)tree
73 // rooted at u, using the Cytron et al. algorithm.
75 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
76 // by pruning the entire IDF computation, rather than merely pruning
77 // the DF -> IDF step.
78 func (df domFrontier) build(u *BasicBlock) {
79 // Encounter each node u in postorder of dom tree.
80 for _, child := range u.dom.children {
83 for _, vb := range u.Succs {
84 if v := vb.dom; v.idom != u {
88 for _, w := range u.dom.children {
89 for _, vb := range df[w.Index] {
90 // TODO(adonovan): opt: use word-parallel bitwise union.
91 if v := vb.dom; v.idom != u {
98 func buildDomFrontier(fn *Function) domFrontier {
99 df := make(domFrontier, len(fn.Blocks))
100 df.build(fn.Blocks[0])
101 if fn.Recover != nil {
107 func removeInstr(refs []Instruction, instr Instruction) []Instruction {
109 for _, ref := range refs {
116 for j := i; j != len(refs); j++ {
117 refs[j] = nil // aid GC
122 // lift replaces local and new Allocs accessed only with
123 // load/store by SSA registers, inserting φ-nodes where necessary.
124 // The result is a program in classical pruned SSA form.
127 // - fn has no dead blocks (blockopt has run).
128 // - Def/use info (Operands and Referrers) is up-to-date.
129 // - The dominator tree is up-to-date.
131 func lift(fn *Function) {
132 // TODO(adonovan): opt: lots of little optimizations may be
133 // worthwhile here, especially if they cause us to avoid
134 // buildDomFrontier. For example:
136 // - Alloc never loaded? Eliminate.
137 // - Alloc never stored? Replace all loads with a zero constant.
138 // - Alloc stored once? Replace loads with dominating store;
139 // don't forget that an Alloc is itself an effective store
141 // - Alloc used only within a single block?
142 // Use degenerate algorithm avoiding φ-nodes.
143 // - Consider synergy with scalar replacement of aggregates (SRA).
144 // e.g. *(&x.f) where x is an Alloc.
145 // Perhaps we'd get better results if we generated this as x.f
146 // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
149 // But we will start with the simplest correct code.
150 df := buildDomFrontier(fn)
154 for i, blocks := range df {
157 fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
160 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
165 newPhis := make(newPhiMap)
167 // During this pass we will replace some BasicBlock.Instrs
168 // (allocs, loads and stores) with nil, keeping a count in
169 // BasicBlock.gaps. At the end we will reset Instrs to the
170 // concatenation of all non-dead newPhis and non-nil Instrs
171 // for the block, reusing the original array if space permits.
173 // While we're here, we also eliminate 'rundefers'
174 // instructions in functions that contain no 'defer'
178 // A counter used to generate ~unique ids for Phi nodes, as an
179 // aid to debugging. We use large numbers to make them highly
180 // visible. All nodes are renumbered later.
183 // Determine which allocs we can lift and number them densely.
184 // The renaming phase uses this numbering for compact maps.
186 for _, b := range fn.Blocks {
189 for _, instr := range b.Instrs {
190 switch instr := instr.(type) {
193 if liftAlloc(df, instr, newPhis, &fresh) {
206 // renaming maps an alloc (keyed by index) to its replacement
207 // value. Initially the renaming contains nil, signifying the
208 // zero constant of the appropriate type; we construct the
209 // Const lazily at most once on each path through the domtree.
210 // TODO(adonovan): opt: cache per-function not per subtree.
211 renaming := make([]Value, numAllocs)
214 rename(fn.Blocks[0], renaming, newPhis)
216 // Eliminate dead φ-nodes.
217 removeDeadPhis(fn.Blocks, newPhis)
219 // Prepend remaining live φ-nodes to each block.
220 for _, b := range fn.Blocks {
224 rundefersToKill := b.rundefers
229 if j+b.gaps+rundefersToKill == 0 {
230 continue // fast path: no new phis or gaps
233 // Compact nps + non-nil Instrs into a new slice.
234 // TODO(adonovan): opt: compact in situ (rightwards)
235 // if Instrs has sufficient space or slack.
236 dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
237 for i, np := range nps {
240 for _, instr := range b.Instrs {
245 if _, ok := instr.(*RunDefers); ok {
255 // Remove any fn.Locals that were lifted.
257 for _, l := range fn.Locals {
263 // Nil out fn.Locals[j:] to aid GC.
264 for i := j; i < len(fn.Locals); i++ {
267 fn.Locals = fn.Locals[:j]
270 // removeDeadPhis removes φ-nodes not transitively needed by a
271 // non-Phi, non-DebugRef instruction.
272 func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
273 // First pass: find the set of "live" φ-nodes: those reachable
274 // from some non-Phi instruction.
276 // We compute reachability in reverse, starting from each φ,
277 // rather than forwards, starting from each live non-Phi
278 // instruction, because this way visits much less of the
280 livePhis := make(map[*Phi]bool)
281 for _, npList := range newPhis {
282 for _, np := range npList {
284 if !livePhis[phi] && phiHasDirectReferrer(phi) {
285 markLivePhi(livePhis, phi)
290 // Existing φ-nodes due to && and || operators
291 // are all considered live (see Go issue 19622).
292 for _, b := range blocks {
293 for _, phi := range b.phis() {
294 markLivePhi(livePhis, phi.(*Phi))
298 // Second pass: eliminate unused phis from newPhis.
299 for block, npList := range newPhis {
301 for _, np := range npList {
302 if livePhis[np.phi] {
306 // discard it, first removing it from referrers
307 for _, val := range np.phi.Edges {
308 if refs := val.Referrers(); refs != nil {
309 *refs = removeInstr(*refs, np.phi)
315 newPhis[block] = npList[:j]
319 // markLivePhi marks phi, and all φ-nodes transitively reachable via
320 // its Operands, live.
321 func markLivePhi(livePhis map[*Phi]bool, phi *Phi) {
323 for _, rand := range phi.Operands(nil) {
324 if q, ok := (*rand).(*Phi); ok {
326 markLivePhi(livePhis, q)
332 // phiHasDirectReferrer reports whether phi is directly referred to by
333 // a non-Phi instruction. Such instructions are the
334 // roots of the liveness traversal.
335 func phiHasDirectReferrer(phi *Phi) bool {
336 for _, instr := range *phi.Referrers() {
337 if _, ok := instr.(*Phi); !ok {
344 type blockSet struct{ big.Int } // (inherit methods from Int)
346 // add adds b to the set and returns true if the set changed.
347 func (s *blockSet) add(b *BasicBlock) bool {
352 s.SetBit(&s.Int, i, 1)
356 // take removes an arbitrary element from a set s and
357 // returns its index, or returns -1 if empty.
358 func (s *blockSet) take() int {
360 for i := 0; i < l; i++ {
362 s.SetBit(&s.Int, i, 0)
369 // newPhi is a pair of a newly introduced φ-node and the lifted Alloc
376 // newPhiMap records for each basic block, the set of newPhis that
377 // must be prepended to the block.
378 type newPhiMap map[*BasicBlock][]newPhi
380 // liftAlloc determines whether alloc can be lifted into registers,
381 // and if so, it populates newPhis with all the φ-nodes it may require
384 // fresh is a source of fresh ids for phi nodes.
386 func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool {
387 // Don't lift aggregates into registers, because we don't have
388 // a way to express their zero-constants.
389 switch deref(alloc.Type()).Underlying().(type) {
390 case *types.Array, *types.Struct:
394 // Don't lift named return values in functions that defer
395 // calls that may recover from panic.
396 if fn := alloc.Parent(); fn.Recover != nil {
397 for _, nr := range fn.namedResults {
404 // Compute defblocks, the set of blocks containing a
405 // definition of the alloc cell.
406 var defblocks blockSet
407 for _, instr := range *alloc.Referrers() {
408 // Bail out if we discover the alloc is not liftable;
409 // the only operations permitted to use the alloc are
410 // loads/stores into the cell, and DebugRef.
411 switch instr := instr.(type) {
413 if instr.Val == alloc {
414 return false // address used as value
416 if instr.Addr != alloc {
417 panic("Alloc.Referrers is inconsistent")
419 defblocks.add(instr.Block())
421 if instr.Op != token.MUL {
422 return false // not a load
424 if instr.X != alloc {
425 panic("Alloc.Referrers is inconsistent")
430 return false // some other instruction
433 // The Alloc itself counts as a (zero) definition of the cell.
434 defblocks.add(alloc.Block())
437 fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
444 // What follows is the body of the main loop of the insert-φ
445 // function described by Cytron et al, but instead of using
446 // counter tricks, we just reset the 'hasAlready' and 'work'
447 // sets each iteration. These are bitmaps so it's pretty cheap.
449 // TODO(adonovan): opt: recycle slice storage for W,
450 // hasAlready, defBlocks across liftAlloc calls.
451 var hasAlready blockSet
453 // Initialize W and work to defblocks.
454 var work blockSet = defblocks // blocks seen
455 var W blockSet // blocks to do
456 W.Set(&defblocks.Int)
458 // Traverse iterated dominance frontier, inserting φ-nodes.
459 for i := W.take(); i != -1; i = W.take() {
461 for _, v := range df[u.Index] {
462 if hasAlready.add(v) {
464 // It will be prepended to v.Instrs later, if needed.
466 Edges: make([]Value, len(v.Preds)),
467 Comment: alloc.Comment,
469 // This is merely a debugging aid:
473 phi.pos = alloc.Pos()
474 phi.setType(deref(alloc.Type()))
477 fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
479 newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
491 // replaceAll replaces all intraprocedural uses of x with y,
492 // updating x.Referrers and y.Referrers.
493 // Precondition: x.Referrers() != nil, i.e. x must be local to some function.
495 func replaceAll(x, y Value) {
497 pxrefs := x.Referrers()
498 pyrefs := y.Referrers()
499 for _, instr := range *pxrefs {
500 rands = instr.Operands(rands[:0]) // recycle storage
501 for _, rand := range rands {
509 *pyrefs = append(*pyrefs, instr) // dups ok
512 *pxrefs = nil // x is now unreferenced
515 // renamed returns the value to which alloc is being renamed,
516 // constructing it lazily if it's the implicit zero initialization.
518 func renamed(renaming []Value, alloc *Alloc) Value {
519 v := renaming[alloc.index]
521 v = zeroConst(deref(alloc.Type()))
522 renaming[alloc.index] = v
527 // rename implements the (Cytron et al) SSA renaming algorithm, a
528 // preorder traversal of the dominator tree replacing all loads of
529 // Alloc cells with the value stored to that cell by the dominating
530 // store instruction. For lifting, we need only consider loads,
531 // stores and φ-nodes.
533 // renaming is a map from *Alloc (keyed by index number) to its
534 // dominating stored value; newPhis[x] is the set of new φ-nodes to be
535 // prepended to block x.
537 func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
538 // Each φ-node becomes the new name for its associated Alloc.
539 for _, np := range newPhis[u] {
542 renaming[alloc.index] = phi
545 // Rename loads and stores of allocs.
546 for i, instr := range u.Instrs {
547 switch instr := instr.(type) {
549 if instr.index >= 0 { // store of zero to Alloc cell
550 // Replace dominated loads by the zero value.
551 renaming[instr.index] = nil
553 fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
561 if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
562 // Replace dominated loads by the stored value.
563 renaming[alloc.index] = instr.Val
565 fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
566 instr, instr.Val.Name())
568 // Remove the store from the referrer list of the stored value.
569 if refs := instr.Val.Referrers(); refs != nil {
570 *refs = removeInstr(*refs, instr)
578 if instr.Op == token.MUL {
579 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
580 newval := renamed(renaming, alloc)
582 fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
583 instr.Name(), instr, newval.Name())
585 // Replace all references to
586 // the loaded value by the
587 // dominating stored value.
588 replaceAll(instr, newval)
596 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
598 instr.X = renamed(renaming, alloc)
601 // Add DebugRef to instr.X's referrers.
602 if refs := instr.X.Referrers(); refs != nil {
603 *refs = append(*refs, instr)
606 // A source expression denotes the address
607 // of an Alloc that was optimized away.
610 // Delete the DebugRef.
618 // For each φ-node in a CFG successor, rename the edge.
619 for _, v := range u.Succs {
625 for _, np := range phis {
628 newval := renamed(renaming, alloc)
630 fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
631 phi.Name(), u, v, i, alloc.Name(), newval.Name())
633 phi.Edges[i] = newval
634 if prefs := newval.Referrers(); prefs != nil {
635 *prefs = append(*prefs, phi)
640 // Continue depth-first recursion over domtree, pushing a
641 // fresh copy of the renaming map for each subtree.
642 for i, v := range u.dom.children {
644 if i < len(u.dom.children)-1 {
645 // On all but the final iteration, we must make
646 // a copy to avoid destructive update.
647 r = make([]Value, len(renaming))
650 rename(v, r, newPhis)