--- /dev/null
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ir
+
+// This file defines the lifting pass which tries to "lift" Alloc
+// cells (new/local variables) into SSA registers, replacing loads
+// with the dominating stored value, eliminating loads and stores, and
+// inserting φ- and σ-nodes as needed.
+
+// Cited papers and resources:
+//
+// Ron Cytron et al. 1991. Efficiently computing SSA form...
+// http://doi.acm.org/10.1145/115372.115320
+//
+// Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
+// Software Practice and Experience 2001, 4:1-10.
+// http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
+//
+// Daniel Berlin, llvmdev mailing list, 2012.
+// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
+// (Be sure to expand the whole thread.)
+//
+// C. Scott Ananian. 1997. The static single information form.
+//
+// Jeremy Singer. 2006. Static program analysis based on virtual register renaming.
+
+// TODO(adonovan): opt: there are many optimizations worth evaluating, and
+// the conventional wisdom for SSA construction is that a simple
+// algorithm well engineered often beats those of better asymptotic
+// complexity on all but the most egregious inputs.
+//
+// Danny Berlin suggests that the Cooper et al. algorithm for
+// computing the dominance frontier is superior to Cytron et al.
+// Furthermore he recommends that rather than computing the DF for the
+// whole function then renaming all alloc cells, it may be cheaper to
+// compute the DF for each alloc cell separately and throw it away.
+//
+// Consider exploiting liveness information to avoid creating dead
+// φ-nodes which we then immediately remove.
+//
+// Also see many other "TODO: opt" suggestions in the code.
+
+import (
+ "fmt"
+ "go/types"
+ "os"
+)
+
+// If true, show diagnostic information at each step of lifting.
+// Very verbose.
+const debugLifting = false
+
+// domFrontier maps each block to the set of blocks in its dominance
+// frontier. The outer slice is conceptually a map keyed by
+// Block.Index. The inner slice is conceptually a set, possibly
+// containing duplicates.
+//
+// TODO(adonovan): opt: measure impact of dups; consider a packed bit
+// representation, e.g. big.Int, and bitwise parallel operations for
+// the union step in the Children loop.
+//
+// domFrontier's methods mutate the slice's elements but not its
+// length, so their receivers needn't be pointers.
+//
+type domFrontier [][]*BasicBlock
+
+func (df domFrontier) add(u, v *BasicBlock) {
+ df[u.Index] = append(df[u.Index], v)
+}
+
+// build builds the dominance frontier df for the dominator tree of
+// fn, using the algorithm found in A Simple, Fast Dominance
+// Algorithm, Figure 5.
+//
+// TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
+// by pruning the entire IDF computation, rather than merely pruning
+// the DF -> IDF step.
+func (df domFrontier) build(fn *Function) {
+ for _, b := range fn.Blocks {
+ if len(b.Preds) >= 2 {
+ for _, p := range b.Preds {
+ runner := p
+ for runner != b.dom.idom {
+ df.add(runner, b)
+ runner = runner.dom.idom
+ }
+ }
+ }
+ }
+}
+
+func buildDomFrontier(fn *Function) domFrontier {
+ df := make(domFrontier, len(fn.Blocks))
+ df.build(fn)
+ return df
+}
+
+type postDomFrontier [][]*BasicBlock
+
+func (rdf postDomFrontier) add(u, v *BasicBlock) {
+ rdf[u.Index] = append(rdf[u.Index], v)
+}
+
+func (rdf postDomFrontier) build(fn *Function) {
+ for _, b := range fn.Blocks {
+ if len(b.Succs) >= 2 {
+ for _, s := range b.Succs {
+ runner := s
+ for runner != b.pdom.idom {
+ rdf.add(runner, b)
+ runner = runner.pdom.idom
+ }
+ }
+ }
+ }
+}
+
+func buildPostDomFrontier(fn *Function) postDomFrontier {
+ rdf := make(postDomFrontier, len(fn.Blocks))
+ rdf.build(fn)
+ return rdf
+}
+
+func removeInstr(refs []Instruction, instr Instruction) []Instruction {
+ i := 0
+ for _, ref := range refs {
+ if ref == instr {
+ continue
+ }
+ refs[i] = ref
+ i++
+ }
+ for j := i; j != len(refs); j++ {
+ refs[j] = nil // aid GC
+ }
+ return refs[:i]
+}
+
+func clearInstrs(instrs []Instruction) {
+ for i := range instrs {
+ instrs[i] = nil
+ }
+}
+
+// lift replaces local and new Allocs accessed only with
+// load/store by IR registers, inserting φ- and σ-nodes where necessary.
+// The result is a program in pruned SSI form.
+//
+// Preconditions:
+// - fn has no dead blocks (blockopt has run).
+// - Def/use info (Operands and Referrers) is up-to-date.
+// - The dominator tree is up-to-date.
+//
+func lift(fn *Function) {
+ // TODO(adonovan): opt: lots of little optimizations may be
+ // worthwhile here, especially if they cause us to avoid
+ // buildDomFrontier. For example:
+ //
+ // - Alloc never loaded? Eliminate.
+ // - Alloc never stored? Replace all loads with a zero constant.
+ // - Alloc stored once? Replace loads with dominating store;
+ // don't forget that an Alloc is itself an effective store
+ // of zero.
+ // - Alloc used only within a single block?
+ // Use degenerate algorithm avoiding φ-nodes.
+ // - Consider synergy with scalar replacement of aggregates (SRA).
+ // e.g. *(&x.f) where x is an Alloc.
+ // Perhaps we'd get better results if we generated this as x.f
+ // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
+ // Unclear.
+ //
+ // But we will start with the simplest correct code.
+ var df domFrontier
+ var rdf postDomFrontier
+ var closure *closure
+ var newPhis newPhiMap
+ var newSigmas newSigmaMap
+
+ // During this pass we will replace some BasicBlock.Instrs
+ // (allocs, loads and stores) with nil, keeping a count in
+ // BasicBlock.gaps. At the end we will reset Instrs to the
+ // concatenation of all non-dead newPhis and non-nil Instrs
+ // for the block, reusing the original array if space permits.
+
+ // While we're here, we also eliminate 'rundefers'
+ // instructions in functions that contain no 'defer'
+ // instructions.
+ usesDefer := false
+
+ // Determine which allocs we can lift and number them densely.
+ // The renaming phase uses this numbering for compact maps.
+ numAllocs := 0
+ for _, b := range fn.Blocks {
+ b.gaps = 0
+ b.rundefers = 0
+ for _, instr := range b.Instrs {
+ switch instr := instr.(type) {
+ case *Alloc:
+ if !liftable(instr) {
+ instr.index = -1
+ continue
+ }
+ index := -1
+ if numAllocs == 0 {
+ df = buildDomFrontier(fn)
+ rdf = buildPostDomFrontier(fn)
+ if len(fn.Blocks) > 2 {
+ closure = transitiveClosure(fn)
+ }
+ newPhis = make(newPhiMap, len(fn.Blocks))
+ newSigmas = make(newSigmaMap, len(fn.Blocks))
+
+ if debugLifting {
+ title := false
+ for i, blocks := range df {
+ if blocks != nil {
+ if !title {
+ fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
+ title = true
+ }
+ fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
+ }
+ }
+ }
+ }
+ liftAlloc(closure, df, rdf, instr, newPhis, newSigmas)
+ index = numAllocs
+ numAllocs++
+ instr.index = index
+ case *Defer:
+ usesDefer = true
+ case *RunDefers:
+ b.rundefers++
+ }
+ }
+ }
+
+ if numAllocs > 0 {
+ // renaming maps an alloc (keyed by index) to its replacement
+ // value. Initially the renaming contains nil, signifying the
+ // zero constant of the appropriate type; we construct the
+ // Const lazily at most once on each path through the domtree.
+ // TODO(adonovan): opt: cache per-function not per subtree.
+ renaming := make([]Value, numAllocs)
+
+ // Renaming.
+ rename(fn.Blocks[0], renaming, newPhis, newSigmas)
+
+ simplifyPhis(newPhis)
+
+ // Eliminate dead φ- and σ-nodes.
+ markLiveNodes(fn.Blocks, newPhis, newSigmas)
+ }
+
+ // Prepend remaining live φ-nodes to each block and possibly kill rundefers.
+ for _, b := range fn.Blocks {
+ var head []Instruction
+ if numAllocs > 0 {
+ nps := newPhis[b.Index]
+ head = make([]Instruction, 0, len(nps))
+ for _, pred := range b.Preds {
+ nss := newSigmas[pred.Index]
+ idx := pred.succIndex(b)
+ for _, newSigma := range nss {
+ if sigma := newSigma.sigmas[idx]; sigma != nil && sigma.live {
+ head = append(head, sigma)
+
+ // we didn't populate referrers before, as most
+ // sigma nodes will be killed
+ if refs := sigma.X.Referrers(); refs != nil {
+ *refs = append(*refs, sigma)
+ }
+ } else if sigma != nil {
+ sigma.block = nil
+ }
+ }
+ }
+ for _, np := range nps {
+ if np.phi.live {
+ head = append(head, np.phi)
+ } else {
+ for _, edge := range np.phi.Edges {
+ if refs := edge.Referrers(); refs != nil {
+ *refs = removeInstr(*refs, np.phi)
+ }
+ }
+ np.phi.block = nil
+ }
+ }
+ }
+
+ rundefersToKill := b.rundefers
+ if usesDefer {
+ rundefersToKill = 0
+ }
+
+ j := len(head)
+ if j+b.gaps+rundefersToKill == 0 {
+ continue // fast path: no new phis or gaps
+ }
+
+ // We could do straight copies instead of element-wise copies
+ // when both b.gaps and rundefersToKill are zero. However,
+ // that seems to only be the case ~1% of the time, which
+ // doesn't seem worth the extra branch.
+
+ // Remove dead instructions, add phis and sigmas
+ ns := len(b.Instrs) + j - b.gaps - rundefersToKill
+ if ns <= cap(b.Instrs) {
+ // b.Instrs has enough capacity to store all instructions
+
+ // OPT(dh): check cap vs the actually required space; if
+ // there is a big enough difference, it may be worth
+ // allocating a new slice, to avoid pinning memory.
+ dst := b.Instrs[:cap(b.Instrs)]
+ i := len(dst) - 1
+ for n := len(b.Instrs) - 1; n >= 0; n-- {
+ instr := dst[n]
+ if instr == nil {
+ continue
+ }
+ if !usesDefer {
+ if _, ok := instr.(*RunDefers); ok {
+ continue
+ }
+ }
+ dst[i] = instr
+ i--
+ }
+ off := i + 1 - len(head)
+ // aid GC
+ clearInstrs(dst[:off])
+ dst = dst[off:]
+ copy(dst, head)
+ b.Instrs = dst
+ } else {
+ // not enough space, so allocate a new slice and copy
+ // over.
+ dst := make([]Instruction, ns)
+ copy(dst, head)
+
+ for _, instr := range b.Instrs {
+ if instr == nil {
+ continue
+ }
+ if !usesDefer {
+ if _, ok := instr.(*RunDefers); ok {
+ continue
+ }
+ }
+ dst[j] = instr
+ j++
+ }
+ b.Instrs = dst
+ }
+ }
+
+ // Remove any fn.Locals that were lifted.
+ j := 0
+ for _, l := range fn.Locals {
+ if l.index < 0 {
+ fn.Locals[j] = l
+ j++
+ }
+ }
+ // Nil out fn.Locals[j:] to aid GC.
+ for i := j; i < len(fn.Locals); i++ {
+ fn.Locals[i] = nil
+ }
+ fn.Locals = fn.Locals[:j]
+}
+
+func hasDirectReferrer(instr Instruction) bool {
+ for _, instr := range *instr.Referrers() {
+ switch instr.(type) {
+ case *Phi, *Sigma:
+ // ignore
+ default:
+ return true
+ }
+ }
+ return false
+}
+
+func markLiveNodes(blocks []*BasicBlock, newPhis newPhiMap, newSigmas newSigmaMap) {
+ // Phi and sigma nodes are considered live if a non-phi, non-sigma
+ // node uses them. Once we find a node that is live, we mark all
+ // of its operands as used, too.
+ for _, npList := range newPhis {
+ for _, np := range npList {
+ phi := np.phi
+ if !phi.live && hasDirectReferrer(phi) {
+ markLivePhi(phi)
+ }
+ }
+ }
+ for _, npList := range newSigmas {
+ for _, np := range npList {
+ for _, sigma := range np.sigmas {
+ if sigma != nil && !sigma.live && hasDirectReferrer(sigma) {
+ markLiveSigma(sigma)
+ }
+ }
+ }
+ }
+ // Existing φ-nodes due to && and || operators
+ // are all considered live (see Go issue 19622).
+ for _, b := range blocks {
+ for _, phi := range b.phis() {
+ markLivePhi(phi.(*Phi))
+ }
+ }
+}
+
+func markLivePhi(phi *Phi) {
+ phi.live = true
+ for _, rand := range phi.Edges {
+ switch rand := rand.(type) {
+ case *Phi:
+ if !rand.live {
+ markLivePhi(rand)
+ }
+ case *Sigma:
+ if !rand.live {
+ markLiveSigma(rand)
+ }
+ }
+ }
+}
+
+func markLiveSigma(sigma *Sigma) {
+ sigma.live = true
+ switch rand := sigma.X.(type) {
+ case *Phi:
+ if !rand.live {
+ markLivePhi(rand)
+ }
+ case *Sigma:
+ if !rand.live {
+ markLiveSigma(rand)
+ }
+ }
+}
+
+// simplifyPhis replaces trivial phis with non-phi alternatives. Phi
+// nodes where all edges are identical, or consist of only the phi
+// itself and one other value, may be replaced with the value.
+func simplifyPhis(newPhis newPhiMap) {
+ // find all phis that are trivial and can be replaced with a
+ // non-phi value. run until we reach a fixpoint, because replacing
+ // a phi may make other phis trivial.
+ for changed := true; changed; {
+ changed = false
+ for _, npList := range newPhis {
+ for _, np := range npList {
+ if np.phi.live {
+ // we're reusing 'live' to mean 'dead' in the context of simplifyPhis
+ continue
+ }
+ if r, ok := isUselessPhi(np.phi); ok {
+ // useless phi, replace its uses with the
+ // replacement value. the dead phi pass will clean
+ // up the phi afterwards.
+ replaceAll(np.phi, r)
+ np.phi.live = true
+ changed = true
+ }
+ }
+ }
+ }
+
+ for _, npList := range newPhis {
+ for _, np := range npList {
+ np.phi.live = false
+ }
+ }
+}
+
+type BlockSet struct {
+ idx int
+ values []bool
+ count int
+}
+
+func NewBlockSet(size int) *BlockSet {
+ return &BlockSet{values: make([]bool, size)}
+}
+
+func (s *BlockSet) Set(s2 *BlockSet) {
+ copy(s.values, s2.values)
+ s.count = 0
+ for _, v := range s.values {
+ if v {
+ s.count++
+ }
+ }
+}
+
+func (s *BlockSet) Num() int {
+ return s.count
+}
+
+func (s *BlockSet) Has(b *BasicBlock) bool {
+ if b.Index >= len(s.values) {
+ return false
+ }
+ return s.values[b.Index]
+}
+
+// add adds b to the set and returns true if the set changed.
+func (s *BlockSet) Add(b *BasicBlock) bool {
+ if s.values[b.Index] {
+ return false
+ }
+ s.count++
+ s.values[b.Index] = true
+ s.idx = b.Index
+
+ return true
+}
+
+func (s *BlockSet) Clear() {
+ for j := range s.values {
+ s.values[j] = false
+ }
+ s.count = 0
+}
+
+// take removes an arbitrary element from a set s and
+// returns its index, or returns -1 if empty.
+func (s *BlockSet) Take() int {
+ // [i, end]
+ for i := s.idx; i < len(s.values); i++ {
+ if s.values[i] {
+ s.values[i] = false
+ s.idx = i
+ s.count--
+ return i
+ }
+ }
+
+ // [start, i)
+ for i := 0; i < s.idx; i++ {
+ if s.values[i] {
+ s.values[i] = false
+ s.idx = i
+ s.count--
+ return i
+ }
+ }
+
+ return -1
+}
+
+type closure struct {
+ span []uint32
+ reachables []interval
+}
+
+type interval uint32
+
+const (
+ flagMask = 1 << 31
+ numBits = 20
+ lengthBits = 32 - numBits - 1
+ lengthMask = (1<<lengthBits - 1) << numBits
+ numMask = 1<<numBits - 1
+)
+
+func (c closure) has(s, v *BasicBlock) bool {
+ idx := uint32(v.Index)
+ if idx == 1 || s.Dominates(v) {
+ return true
+ }
+ r := c.reachable(s.Index)
+ for i := 0; i < len(r); i++ {
+ inv := r[i]
+ var start, end uint32
+ if inv&flagMask == 0 {
+ // small interval
+ start = uint32(inv & numMask)
+ end = start + uint32(inv&lengthMask)>>numBits
+ } else {
+ // large interval
+ i++
+ start = uint32(inv & numMask)
+ end = uint32(r[i])
+ }
+ if idx >= start && idx <= end {
+ return true
+ }
+ }
+ return false
+}
+
+func (c closure) reachable(id int) []interval {
+ return c.reachables[c.span[id]:c.span[id+1]]
+}
+
+func (c closure) walk(current *BasicBlock, b *BasicBlock, visited []bool) {
+ visited[b.Index] = true
+ for _, succ := range b.Succs {
+ if visited[succ.Index] {
+ continue
+ }
+ visited[succ.Index] = true
+ c.walk(current, succ, visited)
+ }
+}
+
+func transitiveClosure(fn *Function) *closure {
+ reachable := make([]bool, len(fn.Blocks))
+ c := &closure{}
+ c.span = make([]uint32, len(fn.Blocks)+1)
+
+ addInterval := func(start, end uint32) {
+ if l := end - start; l <= 1<<lengthBits-1 {
+ n := interval(l<<numBits | start)
+ c.reachables = append(c.reachables, n)
+ } else {
+ n1 := interval(1<<31 | start)
+ n2 := interval(end)
+ c.reachables = append(c.reachables, n1, n2)
+ }
+ }
+
+ for i, b := range fn.Blocks[1:] {
+ for i := range reachable {
+ reachable[i] = false
+ }
+
+ c.walk(b, b, reachable)
+ start := ^uint32(0)
+ for id, isReachable := range reachable {
+ if !isReachable {
+ if start != ^uint32(0) {
+ end := uint32(id) - 1
+ addInterval(start, end)
+ start = ^uint32(0)
+ }
+ continue
+ } else if start == ^uint32(0) {
+ start = uint32(id)
+ }
+ }
+ if start != ^uint32(0) {
+ addInterval(start, uint32(len(reachable))-1)
+ }
+
+ c.span[i+2] = uint32(len(c.reachables))
+ }
+
+ return c
+}
+
+// newPhi is a pair of a newly introduced φ-node and the lifted Alloc
+// it replaces.
+type newPhi struct {
+ phi *Phi
+ alloc *Alloc
+}
+
+type newSigma struct {
+ alloc *Alloc
+ sigmas []*Sigma
+}
+
+// newPhiMap records for each basic block, the set of newPhis that
+// must be prepended to the block.
+type newPhiMap [][]newPhi
+type newSigmaMap [][]newSigma
+
+func liftable(alloc *Alloc) bool {
+ // Don't lift aggregates into registers, because we don't have
+ // a way to express their zero-constants.
+ switch deref(alloc.Type()).Underlying().(type) {
+ case *types.Array, *types.Struct:
+ return false
+ }
+
+ fn := alloc.Parent()
+ // Don't lift named return values in functions that defer
+ // calls that may recover from panic.
+ if fn.hasDefer {
+ for _, nr := range fn.namedResults {
+ if nr == alloc {
+ return false
+ }
+ }
+ }
+
+ for _, instr := range *alloc.Referrers() {
+ switch instr := instr.(type) {
+ case *Store:
+ if instr.Val == alloc {
+ return false // address used as value
+ }
+ if instr.Addr != alloc {
+ panic("Alloc.Referrers is inconsistent")
+ }
+ case *Load:
+ if instr.X != alloc {
+ panic("Alloc.Referrers is inconsistent")
+ }
+
+ case *DebugRef:
+ // ok
+ default:
+ return false
+ }
+ }
+
+ return true
+}
+
+// liftAlloc determines whether alloc can be lifted into registers,
+// and if so, it populates newPhis with all the φ-nodes it may require
+// and returns true.
+func liftAlloc(closure *closure, df domFrontier, rdf postDomFrontier, alloc *Alloc, newPhis newPhiMap, newSigmas newSigmaMap) {
+ fn := alloc.Parent()
+
+ defblocks := fn.blockset(0)
+ useblocks := fn.blockset(1)
+ Aphi := fn.blockset(2)
+ Asigma := fn.blockset(3)
+ W := fn.blockset(4)
+
+ // Compute defblocks, the set of blocks containing a
+ // definition of the alloc cell.
+ for _, instr := range *alloc.Referrers() {
+ // Bail out if we discover the alloc is not liftable;
+ // the only operations permitted to use the alloc are
+ // loads/stores into the cell, and DebugRef.
+ switch instr := instr.(type) {
+ case *Store:
+ defblocks.Add(instr.Block())
+ case *Load:
+ useblocks.Add(instr.Block())
+ for _, ref := range *instr.Referrers() {
+ useblocks.Add(ref.Block())
+ }
+ }
+ }
+ // The Alloc itself counts as a (zero) definition of the cell.
+ defblocks.Add(alloc.Block())
+
+ if debugLifting {
+ fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
+ }
+
+ // Φ-insertion.
+ //
+ // What follows is the body of the main loop of the insert-φ
+ // function described by Cytron et al, but instead of using
+ // counter tricks, we just reset the 'hasAlready' and 'work'
+ // sets each iteration. These are bitmaps so it's pretty cheap.
+
+ // Initialize W and work to defblocks.
+
+ for change := true; change; {
+ change = false
+ {
+ // Traverse iterated dominance frontier, inserting φ-nodes.
+ W.Set(defblocks)
+
+ for i := W.Take(); i != -1; i = W.Take() {
+ n := fn.Blocks[i]
+ for _, y := range df[n.Index] {
+ if Aphi.Add(y) {
+ if len(*alloc.Referrers()) == 0 {
+ continue
+ }
+ live := false
+ if closure == nil {
+ live = true
+ } else {
+ for _, ref := range *alloc.Referrers() {
+ if _, ok := ref.(*Load); ok {
+ if closure.has(y, ref.Block()) {
+ live = true
+ break
+ }
+ }
+ }
+ }
+ if !live {
+ continue
+ }
+
+ // Create φ-node.
+ // It will be prepended to v.Instrs later, if needed.
+ phi := &Phi{
+ Edges: make([]Value, len(y.Preds)),
+ }
+
+ phi.source = alloc.source
+ phi.setType(deref(alloc.Type()))
+ phi.block = y
+ if debugLifting {
+ fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, y)
+ }
+ newPhis[y.Index] = append(newPhis[y.Index], newPhi{phi, alloc})
+
+ for _, p := range y.Preds {
+ useblocks.Add(p)
+ }
+ change = true
+ if defblocks.Add(y) {
+ W.Add(y)
+ }
+ }
+ }
+ }
+ }
+
+ {
+ W.Set(useblocks)
+ for i := W.Take(); i != -1; i = W.Take() {
+ n := fn.Blocks[i]
+ for _, y := range rdf[n.Index] {
+ if Asigma.Add(y) {
+ sigmas := make([]*Sigma, 0, len(y.Succs))
+ anyLive := false
+ for _, succ := range y.Succs {
+ live := false
+ for _, ref := range *alloc.Referrers() {
+ if closure == nil || closure.has(succ, ref.Block()) {
+ live = true
+ anyLive = true
+ break
+ }
+ }
+ if live {
+ sigma := &Sigma{
+ From: y,
+ X: alloc,
+ }
+ sigma.source = alloc.source
+ sigma.setType(deref(alloc.Type()))
+ sigma.block = succ
+ sigmas = append(sigmas, sigma)
+ } else {
+ sigmas = append(sigmas, nil)
+ }
+ }
+
+ if anyLive {
+ newSigmas[y.Index] = append(newSigmas[y.Index], newSigma{alloc, sigmas})
+ for _, s := range y.Succs {
+ defblocks.Add(s)
+ }
+ change = true
+ if useblocks.Add(y) {
+ W.Add(y)
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+// replaceAll replaces all intraprocedural uses of x with y,
+// updating x.Referrers and y.Referrers.
+// Precondition: x.Referrers() != nil, i.e. x must be local to some function.
+//
+func replaceAll(x, y Value) {
+ var rands []*Value
+ pxrefs := x.Referrers()
+ pyrefs := y.Referrers()
+ for _, instr := range *pxrefs {
+ rands = instr.Operands(rands[:0]) // recycle storage
+ for _, rand := range rands {
+ if *rand != nil {
+ if *rand == x {
+ *rand = y
+ }
+ }
+ }
+ if pyrefs != nil {
+ *pyrefs = append(*pyrefs, instr) // dups ok
+ }
+ }
+ *pxrefs = nil // x is now unreferenced
+}
+
+// renamed returns the value to which alloc is being renamed,
+// constructing it lazily if it's the implicit zero initialization.
+//
+func renamed(fn *Function, renaming []Value, alloc *Alloc) Value {
+ v := renaming[alloc.index]
+ if v == nil {
+ v = emitConst(fn, zeroConst(deref(alloc.Type())))
+ renaming[alloc.index] = v
+ }
+ return v
+}
+
+// rename implements the Cytron et al-based SSI renaming algorithm, a
+// preorder traversal of the dominator tree replacing all loads of
+// Alloc cells with the value stored to that cell by the dominating
+// store instruction.
+//
+// renaming is a map from *Alloc (keyed by index number) to its
+// dominating stored value; newPhis[x] is the set of new φ-nodes to be
+// prepended to block x.
+//
+func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap, newSigmas newSigmaMap) {
+ // Each φ-node becomes the new name for its associated Alloc.
+ for _, np := range newPhis[u.Index] {
+ phi := np.phi
+ alloc := np.alloc
+ renaming[alloc.index] = phi
+ }
+
+ // Rename loads and stores of allocs.
+ for i, instr := range u.Instrs {
+ switch instr := instr.(type) {
+ case *Alloc:
+ if instr.index >= 0 { // store of zero to Alloc cell
+ // Replace dominated loads by the zero value.
+ renaming[instr.index] = nil
+ if debugLifting {
+ fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
+ }
+ // Delete the Alloc.
+ u.Instrs[i] = nil
+ u.gaps++
+ }
+
+ case *Store:
+ if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
+ // Replace dominated loads by the stored value.
+ renaming[alloc.index] = instr.Val
+ if debugLifting {
+ fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
+ instr, instr.Val.Name())
+ }
+ if refs := instr.Addr.Referrers(); refs != nil {
+ *refs = removeInstr(*refs, instr)
+ }
+ if refs := instr.Val.Referrers(); refs != nil {
+ *refs = removeInstr(*refs, instr)
+ }
+ // Delete the Store.
+ u.Instrs[i] = nil
+ u.gaps++
+ }
+
+ case *Load:
+ if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
+ // In theory, we wouldn't be able to replace loads
+ // directly, because a loaded value could be used in
+ // different branches, in which case it should be
+ // replaced with different sigma nodes. But we can't
+ // simply defer replacement, either, because then
+ // later stores might incorrectly affect this load.
+ //
+ // To avoid doing renaming on _all_ values (instead of
+ // just loads and stores like we're doing), we make
+ // sure during code generation that each load is only
+ // used in one block. For example, in constant switch
+ // statements, where the tag is only evaluated once,
+ // we store it in a temporary and load it for each
+ // comparison, so that we have individual loads to
+ // replace.
+ newval := renamed(u.Parent(), renaming, alloc)
+ if debugLifting {
+ fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
+ instr.Name(), instr, newval)
+ }
+ replaceAll(instr, newval)
+ u.Instrs[i] = nil
+ u.gaps++
+ }
+
+ case *DebugRef:
+ if x, ok := instr.X.(*Alloc); ok && x.index >= 0 {
+ if instr.IsAddr {
+ instr.X = renamed(u.Parent(), renaming, x)
+ instr.IsAddr = false
+
+ // Add DebugRef to instr.X's referrers.
+ if refs := instr.X.Referrers(); refs != nil {
+ *refs = append(*refs, instr)
+ }
+ } else {
+ // A source expression denotes the address
+ // of an Alloc that was optimized away.
+ instr.X = nil
+
+ // Delete the DebugRef.
+ u.Instrs[i] = nil
+ u.gaps++
+ }
+ }
+ }
+ }
+
+ // update all outgoing sigma nodes with the dominating store
+ for _, sigmas := range newSigmas[u.Index] {
+ for _, sigma := range sigmas.sigmas {
+ if sigma == nil {
+ continue
+ }
+ sigma.X = renamed(u.Parent(), renaming, sigmas.alloc)
+ }
+ }
+
+ // For each φ-node in a CFG successor, rename the edge.
+ for succi, v := range u.Succs {
+ phis := newPhis[v.Index]
+ if len(phis) == 0 {
+ continue
+ }
+ i := v.predIndex(u)
+ for _, np := range phis {
+ phi := np.phi
+ alloc := np.alloc
+ // if there's a sigma node, use it, else use the dominating value
+ var newval Value
+ for _, sigmas := range newSigmas[u.Index] {
+ if sigmas.alloc == alloc && sigmas.sigmas[succi] != nil {
+ newval = sigmas.sigmas[succi]
+ break
+ }
+ }
+ if newval == nil {
+ newval = renamed(u.Parent(), renaming, alloc)
+ }
+ if debugLifting {
+ fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
+ phi.Name(), u, v, i, alloc.Name(), newval.Name())
+ }
+ phi.Edges[i] = newval
+ if prefs := newval.Referrers(); prefs != nil {
+ *prefs = append(*prefs, phi)
+ }
+ }
+ }
+
+ // Continue depth-first recursion over domtree, pushing a
+ // fresh copy of the renaming map for each subtree.
+ r := make([]Value, len(renaming))
+ for _, v := range u.dom.children {
+ // XXX add debugging
+ copy(r, renaming)
+
+ // on entry to a block, the incoming sigma nodes become the new values for their alloc
+ if idx := u.succIndex(v); idx != -1 {
+ for _, sigma := range newSigmas[u.Index] {
+ if sigma.sigmas[idx] != nil {
+ r[sigma.alloc.index] = sigma.sigmas[idx]
+ }
+ }
+ }
+ rename(v, r, newPhis, newSigmas)
+ }
+
+}