// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package ssa // This file defines the lifting pass which tries to "lift" Alloc // cells (new/local variables) into SSA registers, replacing loads // with the dominating stored value, eliminating loads and stores, and // inserting φ-nodes as needed. // Cited papers and resources: // // Ron Cytron et al. 1991. Efficiently computing SSA form... // http://doi.acm.org/10.1145/115372.115320 // // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. // Software Practice and Experience 2001, 4:1-10. // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf // // Daniel Berlin, llvmdev mailing list, 2012. // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html // (Be sure to expand the whole thread.) // TODO(adonovan): opt: there are many optimizations worth evaluating, and // the conventional wisdom for SSA construction is that a simple // algorithm well engineered often beats those of better asymptotic // complexity on all but the most egregious inputs. // // Danny Berlin suggests that the Cooper et al. algorithm for // computing the dominance frontier is superior to Cytron et al. // Furthermore he recommends that rather than computing the DF for the // whole function then renaming all alloc cells, it may be cheaper to // compute the DF for each alloc cell separately and throw it away. // // Consider exploiting liveness information to avoid creating dead // φ-nodes which we then immediately remove. // // Also see many other "TODO: opt" suggestions in the code. import ( "fmt" "go/token" "go/types" "math/big" "os" ) // If true, show diagnostic information at each step of lifting. // Very verbose. const debugLifting = false // domFrontier maps each block to the set of blocks in its dominance // frontier. The outer slice is conceptually a map keyed by // Block.Index. The inner slice is conceptually a set, possibly // containing duplicates. // // TODO(adonovan): opt: measure impact of dups; consider a packed bit // representation, e.g. big.Int, and bitwise parallel operations for // the union step in the Children loop. // // domFrontier's methods mutate the slice's elements but not its // length, so their receivers needn't be pointers. // type domFrontier [][]*BasicBlock func (df domFrontier) add(u, v *BasicBlock) { p := &df[u.Index] *p = append(*p, v) } // build builds the dominance frontier df for the dominator (sub)tree // rooted at u, using the Cytron et al. algorithm. // // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA // by pruning the entire IDF computation, rather than merely pruning // the DF -> IDF step. func (df domFrontier) build(u *BasicBlock) { // Encounter each node u in postorder of dom tree. for _, child := range u.dom.children { df.build(child) } for _, vb := range u.Succs { if v := vb.dom; v.idom != u { df.add(u, vb) } } for _, w := range u.dom.children { for _, vb := range df[w.Index] { // TODO(adonovan): opt: use word-parallel bitwise union. if v := vb.dom; v.idom != u { df.add(u, vb) } } } } func buildDomFrontier(fn *Function) domFrontier { df := make(domFrontier, len(fn.Blocks)) df.build(fn.Blocks[0]) if fn.Recover != nil { df.build(fn.Recover) } return df } func removeInstr(refs []Instruction, instr Instruction) []Instruction { i := 0 for _, ref := range refs { if ref == instr { continue } refs[i] = ref i++ } for j := i; j != len(refs); j++ { refs[j] = nil // aid GC } return refs[:i] } // lift replaces local and new Allocs accessed only with // load/store by SSA registers, inserting φ-nodes where necessary. // The result is a program in classical pruned SSA form. // // Preconditions: // - fn has no dead blocks (blockopt has run). // - Def/use info (Operands and Referrers) is up-to-date. // - The dominator tree is up-to-date. // func lift(fn *Function) { // TODO(adonovan): opt: lots of little optimizations may be // worthwhile here, especially if they cause us to avoid // buildDomFrontier. For example: // // - Alloc never loaded? Eliminate. // - Alloc never stored? Replace all loads with a zero constant. // - Alloc stored once? Replace loads with dominating store; // don't forget that an Alloc is itself an effective store // of zero. // - Alloc used only within a single block? // Use degenerate algorithm avoiding φ-nodes. // - Consider synergy with scalar replacement of aggregates (SRA). // e.g. *(&x.f) where x is an Alloc. // Perhaps we'd get better results if we generated this as x.f // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)). // Unclear. // // But we will start with the simplest correct code. df := buildDomFrontier(fn) if debugLifting { title := false for i, blocks := range df { if blocks != nil { if !title { fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn) title = true } fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks) } } } newPhis := make(newPhiMap) // During this pass we will replace some BasicBlock.Instrs // (allocs, loads and stores) with nil, keeping a count in // BasicBlock.gaps. At the end we will reset Instrs to the // concatenation of all non-dead newPhis and non-nil Instrs // for the block, reusing the original array if space permits. // While we're here, we also eliminate 'rundefers' // instructions in functions that contain no 'defer' // instructions. usesDefer := false // A counter used to generate ~unique ids for Phi nodes, as an // aid to debugging. We use large numbers to make them highly // visible. All nodes are renumbered later. fresh := 1000 // Determine which allocs we can lift and number them densely. // The renaming phase uses this numbering for compact maps. numAllocs := 0 for _, b := range fn.Blocks { b.gaps = 0 b.rundefers = 0 for _, instr := range b.Instrs { switch instr := instr.(type) { case *Alloc: index := -1 if liftAlloc(df, instr, newPhis, &fresh) { index = numAllocs numAllocs++ } instr.index = index case *Defer: usesDefer = true case *RunDefers: b.rundefers++ } } } // renaming maps an alloc (keyed by index) to its replacement // value. Initially the renaming contains nil, signifying the // zero constant of the appropriate type; we construct the // Const lazily at most once on each path through the domtree. // TODO(adonovan): opt: cache per-function not per subtree. renaming := make([]Value, numAllocs) // Renaming. rename(fn.Blocks[0], renaming, newPhis) // Eliminate dead φ-nodes. removeDeadPhis(fn.Blocks, newPhis) // Prepend remaining live φ-nodes to each block. for _, b := range fn.Blocks { nps := newPhis[b] j := len(nps) rundefersToKill := b.rundefers if usesDefer { rundefersToKill = 0 } if j+b.gaps+rundefersToKill == 0 { continue // fast path: no new phis or gaps } // Compact nps + non-nil Instrs into a new slice. // TODO(adonovan): opt: compact in situ (rightwards) // if Instrs has sufficient space or slack. dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill) for i, np := range nps { dst[i] = np.phi } for _, instr := range b.Instrs { if instr == nil { continue } if !usesDefer { if _, ok := instr.(*RunDefers); ok { continue } } dst[j] = instr j++ } b.Instrs = dst } // Remove any fn.Locals that were lifted. j := 0 for _, l := range fn.Locals { if l.index < 0 { fn.Locals[j] = l j++ } } // Nil out fn.Locals[j:] to aid GC. for i := j; i < len(fn.Locals); i++ { fn.Locals[i] = nil } fn.Locals = fn.Locals[:j] } // removeDeadPhis removes φ-nodes not transitively needed by a // non-Phi, non-DebugRef instruction. func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) { // First pass: find the set of "live" φ-nodes: those reachable // from some non-Phi instruction. // // We compute reachability in reverse, starting from each φ, // rather than forwards, starting from each live non-Phi // instruction, because this way visits much less of the // Value graph. livePhis := make(map[*Phi]bool) for _, npList := range newPhis { for _, np := range npList { phi := np.phi if !livePhis[phi] && phiHasDirectReferrer(phi) { markLivePhi(livePhis, phi) } } } // Existing φ-nodes due to && and || operators // are all considered live (see Go issue 19622). for _, b := range blocks { for _, phi := range b.phis() { markLivePhi(livePhis, phi.(*Phi)) } } // Second pass: eliminate unused phis from newPhis. for block, npList := range newPhis { j := 0 for _, np := range npList { if livePhis[np.phi] { npList[j] = np j++ } else { // discard it, first removing it from referrers for _, val := range np.phi.Edges { if refs := val.Referrers(); refs != nil { *refs = removeInstr(*refs, np.phi) } } np.phi.block = nil } } newPhis[block] = npList[:j] } } // markLivePhi marks phi, and all φ-nodes transitively reachable via // its Operands, live. func markLivePhi(livePhis map[*Phi]bool, phi *Phi) { livePhis[phi] = true for _, rand := range phi.Operands(nil) { if q, ok := (*rand).(*Phi); ok { if !livePhis[q] { markLivePhi(livePhis, q) } } } } // phiHasDirectReferrer reports whether phi is directly referred to by // a non-Phi instruction. Such instructions are the // roots of the liveness traversal. func phiHasDirectReferrer(phi *Phi) bool { for _, instr := range *phi.Referrers() { if _, ok := instr.(*Phi); !ok { return true } } return false } type blockSet struct{ big.Int } // (inherit methods from Int) // add adds b to the set and returns true if the set changed. func (s *blockSet) add(b *BasicBlock) bool { i := b.Index if s.Bit(i) != 0 { return false } s.SetBit(&s.Int, i, 1) return true } // take removes an arbitrary element from a set s and // returns its index, or returns -1 if empty. func (s *blockSet) take() int { l := s.BitLen() for i := 0; i < l; i++ { if s.Bit(i) == 1 { s.SetBit(&s.Int, i, 0) return i } } return -1 } // newPhi is a pair of a newly introduced φ-node and the lifted Alloc // it replaces. type newPhi struct { phi *Phi alloc *Alloc } // newPhiMap records for each basic block, the set of newPhis that // must be prepended to the block. type newPhiMap map[*BasicBlock][]newPhi // liftAlloc determines whether alloc can be lifted into registers, // and if so, it populates newPhis with all the φ-nodes it may require // and returns true. // // fresh is a source of fresh ids for phi nodes. // func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool { // Don't lift aggregates into registers, because we don't have // a way to express their zero-constants. switch deref(alloc.Type()).Underlying().(type) { case *types.Array, *types.Struct: return false } // Don't lift named return values in functions that defer // calls that may recover from panic. if fn := alloc.Parent(); fn.Recover != nil { for _, nr := range fn.namedResults { if nr == alloc { return false } } } // Compute defblocks, the set of blocks containing a // definition of the alloc cell. var defblocks blockSet for _, instr := range *alloc.Referrers() { // Bail out if we discover the alloc is not liftable; // the only operations permitted to use the alloc are // loads/stores into the cell, and DebugRef. switch instr := instr.(type) { case *Store: if instr.Val == alloc { return false // address used as value } if instr.Addr != alloc { panic("Alloc.Referrers is inconsistent") } defblocks.add(instr.Block()) case *UnOp: if instr.Op != token.MUL { return false // not a load } if instr.X != alloc { panic("Alloc.Referrers is inconsistent") } case *DebugRef: // ok default: return false // some other instruction } } // The Alloc itself counts as a (zero) definition of the cell. defblocks.add(alloc.Block()) if debugLifting { fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name()) } fn := alloc.Parent() // Φ-insertion. // // What follows is the body of the main loop of the insert-φ // function described by Cytron et al, but instead of using // counter tricks, we just reset the 'hasAlready' and 'work' // sets each iteration. These are bitmaps so it's pretty cheap. // // TODO(adonovan): opt: recycle slice storage for W, // hasAlready, defBlocks across liftAlloc calls. var hasAlready blockSet // Initialize W and work to defblocks. var work blockSet = defblocks // blocks seen var W blockSet // blocks to do W.Set(&defblocks.Int) // Traverse iterated dominance frontier, inserting φ-nodes. for i := W.take(); i != -1; i = W.take() { u := fn.Blocks[i] for _, v := range df[u.Index] { if hasAlready.add(v) { // Create φ-node. // It will be prepended to v.Instrs later, if needed. phi := &Phi{ Edges: make([]Value, len(v.Preds)), Comment: alloc.Comment, } // This is merely a debugging aid: phi.setNum(*fresh) *fresh++ phi.pos = alloc.Pos() phi.setType(deref(alloc.Type())) phi.block = v if debugLifting { fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v) } newPhis[v] = append(newPhis[v], newPhi{phi, alloc}) if work.add(v) { W.add(v) } } } } return true } // replaceAll replaces all intraprocedural uses of x with y, // updating x.Referrers and y.Referrers. // Precondition: x.Referrers() != nil, i.e. x must be local to some function. // func replaceAll(x, y Value) { var rands []*Value pxrefs := x.Referrers() pyrefs := y.Referrers() for _, instr := range *pxrefs { rands = instr.Operands(rands[:0]) // recycle storage for _, rand := range rands { if *rand != nil { if *rand == x { *rand = y } } } if pyrefs != nil { *pyrefs = append(*pyrefs, instr) // dups ok } } *pxrefs = nil // x is now unreferenced } // renamed returns the value to which alloc is being renamed, // constructing it lazily if it's the implicit zero initialization. // func renamed(renaming []Value, alloc *Alloc) Value { v := renaming[alloc.index] if v == nil { v = zeroConst(deref(alloc.Type())) renaming[alloc.index] = v } return v } // rename implements the (Cytron et al) SSA renaming algorithm, a // preorder traversal of the dominator tree replacing all loads of // Alloc cells with the value stored to that cell by the dominating // store instruction. For lifting, we need only consider loads, // stores and φ-nodes. // // renaming is a map from *Alloc (keyed by index number) to its // dominating stored value; newPhis[x] is the set of new φ-nodes to be // prepended to block x. // func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) { // Each φ-node becomes the new name for its associated Alloc. for _, np := range newPhis[u] { phi := np.phi alloc := np.alloc renaming[alloc.index] = phi } // Rename loads and stores of allocs. for i, instr := range u.Instrs { switch instr := instr.(type) { case *Alloc: if instr.index >= 0 { // store of zero to Alloc cell // Replace dominated loads by the zero value. renaming[instr.index] = nil if debugLifting { fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr) } // Delete the Alloc. u.Instrs[i] = nil u.gaps++ } case *Store: if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell // Replace dominated loads by the stored value. renaming[alloc.index] = instr.Val if debugLifting { fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n", instr, instr.Val.Name()) } // Remove the store from the referrer list of the stored value. if refs := instr.Val.Referrers(); refs != nil { *refs = removeInstr(*refs, instr) } // Delete the Store. u.Instrs[i] = nil u.gaps++ } case *UnOp: if instr.Op == token.MUL { if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell newval := renamed(renaming, alloc) if debugLifting { fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n", instr.Name(), instr, newval.Name()) } // Replace all references to // the loaded value by the // dominating stored value. replaceAll(instr, newval) // Delete the Load. u.Instrs[i] = nil u.gaps++ } } case *DebugRef: if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell if instr.IsAddr { instr.X = renamed(renaming, alloc) instr.IsAddr = false // Add DebugRef to instr.X's referrers. if refs := instr.X.Referrers(); refs != nil { *refs = append(*refs, instr) } } else { // A source expression denotes the address // of an Alloc that was optimized away. instr.X = nil // Delete the DebugRef. u.Instrs[i] = nil u.gaps++ } } } } // For each φ-node in a CFG successor, rename the edge. for _, v := range u.Succs { phis := newPhis[v] if len(phis) == 0 { continue } i := v.predIndex(u) for _, np := range phis { phi := np.phi alloc := np.alloc newval := renamed(renaming, alloc) if debugLifting { fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n", phi.Name(), u, v, i, alloc.Name(), newval.Name()) } phi.Edges[i] = newval if prefs := newval.Referrers(); prefs != nil { *prefs = append(*prefs, phi) } } } // Continue depth-first recursion over domtree, pushing a // fresh copy of the renaming map for each subtree. for i, v := range u.dom.children { r := renaming if i < len(u.dom.children)-1 { // On all but the final iteration, we must make // a copy to avoid destructive update. r = make([]Value, len(renaming)) copy(r, renaming) } rename(v, r, newPhis) } }