--- /dev/null
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This package provides Rapid Type Analysis (RTA) for Go, a fast
+// algorithm for call graph construction and discovery of reachable code
+// (and hence dead code) and runtime types. The algorithm was first
+// described in:
+//
+// David F. Bacon and Peter F. Sweeney. 1996.
+// Fast static analysis of C++ virtual function calls. (OOPSLA '96)
+// http://doi.acm.org/10.1145/236337.236371
+//
+// The algorithm uses dynamic programming to tabulate the cross-product
+// of the set of known "address taken" functions with the set of known
+// dynamic calls of the same type. As each new address-taken function
+// is discovered, call graph edges are added from each known callsite,
+// and as each new call site is discovered, call graph edges are added
+// from it to each known address-taken function.
+//
+// A similar approach is used for dynamic calls via interfaces: it
+// tabulates the cross-product of the set of known "runtime types",
+// i.e. types that may appear in an interface value, or be derived from
+// one via reflection, with the set of known "invoke"-mode dynamic
+// calls. As each new "runtime type" is discovered, call edges are
+// added from the known call sites, and as each new call site is
+// discovered, call graph edges are added to each compatible
+// method.
+//
+// In addition, we must consider all exported methods of any runtime type
+// as reachable, since they may be called via reflection.
+//
+// Each time a newly added call edge causes a new function to become
+// reachable, the code of that function is analyzed for more call sites,
+// address-taken functions, and runtime types. The process continues
+// until a fixed point is achieved.
+//
+// The resulting call graph is less precise than one produced by pointer
+// analysis, but the algorithm is much faster. For example, running the
+// cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s
+// for points-to analysis.
+//
+package rta // import "honnef.co/go/tools/callgraph/rta"
+
+// TODO(adonovan): test it by connecting it to the interpreter and
+// replacing all "unreachable" functions by a special intrinsic, and
+// ensure that that intrinsic is never called.
+
+import (
+ "fmt"
+ "go/types"
+
+ "golang.org/x/tools/go/types/typeutil"
+ "honnef.co/go/tools/callgraph"
+ "honnef.co/go/tools/ir"
+)
+
+// A Result holds the results of Rapid Type Analysis, which includes the
+// set of reachable functions/methods, runtime types, and the call graph.
+//
+type Result struct {
+ // CallGraph is the discovered callgraph.
+ // It does not include edges for calls made via reflection.
+ CallGraph *callgraph.Graph
+
+ // Reachable contains the set of reachable functions and methods.
+ // This includes exported methods of runtime types, since
+ // they may be accessed via reflection.
+ // The value indicates whether the function is address-taken.
+ //
+ // (We wrap the bool in a struct to avoid inadvertent use of
+ // "if Reachable[f] {" to test for set membership.)
+ Reachable map[*ir.Function]struct{ AddrTaken bool }
+
+ // RuntimeTypes contains the set of types that are needed at
+ // runtime, for interfaces or reflection.
+ //
+ // The value indicates whether the type is inaccessible to reflection.
+ // Consider:
+ // type A struct{B}
+ // fmt.Println(new(A))
+ // Types *A, A and B are accessible to reflection, but the unnamed
+ // type struct{B} is not.
+ RuntimeTypes typeutil.Map
+}
+
+// Working state of the RTA algorithm.
+type rta struct {
+ result *Result
+
+ prog *ir.Program
+
+ worklist []*ir.Function // list of functions to visit
+
+ // addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature.
+ // Keys are *types.Signature, values are map[*ir.Function]bool sets.
+ addrTakenFuncsBySig typeutil.Map
+
+ // dynCallSites contains all dynamic "call"-mode call sites, grouped by signature.
+ // Keys are *types.Signature, values are unordered []ir.CallInstruction.
+ dynCallSites typeutil.Map
+
+ // invokeSites contains all "invoke"-mode call sites, grouped by interface.
+ // Keys are *types.Interface (never *types.Named),
+ // Values are unordered []ir.CallInstruction sets.
+ invokeSites typeutil.Map
+
+ // The following two maps together define the subset of the
+ // m:n "implements" relation needed by the algorithm.
+
+ // concreteTypes maps each concrete type to the set of interfaces that it implements.
+ // Keys are types.Type, values are unordered []*types.Interface.
+ // Only concrete types used as MakeInterface operands are included.
+ concreteTypes typeutil.Map
+
+ // interfaceTypes maps each interface type to
+ // the set of concrete types that implement it.
+ // Keys are *types.Interface, values are unordered []types.Type.
+ // Only interfaces used in "invoke"-mode CallInstructions are included.
+ interfaceTypes typeutil.Map
+}
+
+// addReachable marks a function as potentially callable at run-time,
+// and ensures that it gets processed.
+func (r *rta) addReachable(f *ir.Function, addrTaken bool) {
+ reachable := r.result.Reachable
+ n := len(reachable)
+ v := reachable[f]
+ if addrTaken {
+ v.AddrTaken = true
+ }
+ reachable[f] = v
+ if len(reachable) > n {
+ // First time seeing f. Add it to the worklist.
+ r.worklist = append(r.worklist, f)
+ }
+}
+
+// addEdge adds the specified call graph edge, and marks it reachable.
+// addrTaken indicates whether to mark the callee as "address-taken".
+func (r *rta) addEdge(site ir.CallInstruction, callee *ir.Function, addrTaken bool) {
+ r.addReachable(callee, addrTaken)
+
+ if g := r.result.CallGraph; g != nil {
+ if site.Parent() == nil {
+ panic(site)
+ }
+ from := g.CreateNode(site.Parent())
+ to := g.CreateNode(callee)
+ callgraph.AddEdge(from, site, to)
+ }
+}
+
+// ---------- addrTakenFuncs × dynCallSites ----------
+
+// visitAddrTakenFunc is called each time we encounter an address-taken function f.
+func (r *rta) visitAddrTakenFunc(f *ir.Function) {
+ // Create two-level map (Signature -> Function -> bool).
+ S := f.Signature
+ funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ir.Function]bool)
+ if funcs == nil {
+ funcs = make(map[*ir.Function]bool)
+ r.addrTakenFuncsBySig.Set(S, funcs)
+ }
+ if !funcs[f] {
+ // First time seeing f.
+ funcs[f] = true
+
+ // If we've seen any dyncalls of this type, mark it reachable,
+ // and add call graph edges.
+ sites, _ := r.dynCallSites.At(S).([]ir.CallInstruction)
+ for _, site := range sites {
+ r.addEdge(site, f, true)
+ }
+ }
+}
+
+// visitDynCall is called each time we encounter a dynamic "call"-mode call.
+func (r *rta) visitDynCall(site ir.CallInstruction) {
+ S := site.Common().Signature()
+
+ // Record the call site.
+ sites, _ := r.dynCallSites.At(S).([]ir.CallInstruction)
+ r.dynCallSites.Set(S, append(sites, site))
+
+ // For each function of signature S that we know is address-taken,
+ // mark it reachable. We'll add the callgraph edges later.
+ funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ir.Function]bool)
+ for g := range funcs {
+ r.addEdge(site, g, true)
+ }
+}
+
+// ---------- concrete types × invoke sites ----------
+
+// addInvokeEdge is called for each new pair (site, C) in the matrix.
+func (r *rta) addInvokeEdge(site ir.CallInstruction, C types.Type) {
+ // Ascertain the concrete method of C to be called.
+ imethod := site.Common().Method
+ cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name()))
+ r.addEdge(site, cmethod, true)
+}
+
+// visitInvoke is called each time the algorithm encounters an "invoke"-mode call.
+func (r *rta) visitInvoke(site ir.CallInstruction) {
+ I := site.Common().Value.Type().Underlying().(*types.Interface)
+
+ // Record the invoke site.
+ sites, _ := r.invokeSites.At(I).([]ir.CallInstruction)
+ r.invokeSites.Set(I, append(sites, site))
+
+ // Add callgraph edge for each existing
+ // address-taken concrete type implementing I.
+ for _, C := range r.implementations(I) {
+ r.addInvokeEdge(site, C)
+ }
+}
+
+// ---------- main algorithm ----------
+
+// visitFunc processes function f.
+func (r *rta) visitFunc(f *ir.Function) {
+ var space [32]*ir.Value // preallocate space for common case
+
+ for _, b := range f.Blocks {
+ for _, instr := range b.Instrs {
+ rands := instr.Operands(space[:0])
+
+ switch instr := instr.(type) {
+ case ir.CallInstruction:
+ call := instr.Common()
+ if call.IsInvoke() {
+ r.visitInvoke(instr)
+ } else if g := call.StaticCallee(); g != nil {
+ r.addEdge(instr, g, false)
+ } else if _, ok := call.Value.(*ir.Builtin); !ok {
+ r.visitDynCall(instr)
+ }
+
+ // Ignore the call-position operand when
+ // looking for address-taken Functions.
+ // Hack: assume this is rands[0].
+ rands = rands[1:]
+
+ case *ir.MakeInterface:
+ r.addRuntimeType(instr.X.Type(), false)
+ }
+
+ // Process all address-taken functions.
+ for _, op := range rands {
+ if g, ok := (*op).(*ir.Function); ok {
+ r.visitAddrTakenFunc(g)
+ }
+ }
+ }
+ }
+}
+
+// Analyze performs Rapid Type Analysis, starting at the specified root
+// functions. It returns nil if no roots were specified.
+//
+// If buildCallGraph is true, Result.CallGraph will contain a call
+// graph; otherwise, only the other fields (reachable functions) are
+// populated.
+//
+func Analyze(roots []*ir.Function, buildCallGraph bool) *Result {
+ if len(roots) == 0 {
+ return nil
+ }
+
+ r := &rta{
+ result: &Result{Reachable: make(map[*ir.Function]struct{ AddrTaken bool })},
+ prog: roots[0].Prog,
+ }
+
+ if buildCallGraph {
+ // TODO(adonovan): change callgraph API to eliminate the
+ // notion of a distinguished root node. Some callgraphs
+ // have many roots, or none.
+ r.result.CallGraph = callgraph.New(roots[0])
+ }
+
+ hasher := typeutil.MakeHasher()
+ r.result.RuntimeTypes.SetHasher(hasher)
+ r.addrTakenFuncsBySig.SetHasher(hasher)
+ r.dynCallSites.SetHasher(hasher)
+ r.invokeSites.SetHasher(hasher)
+ r.concreteTypes.SetHasher(hasher)
+ r.interfaceTypes.SetHasher(hasher)
+
+ // Visit functions, processing their instructions, and adding
+ // new functions to the worklist, until a fixed point is
+ // reached.
+ var shadow []*ir.Function // for efficiency, we double-buffer the worklist
+ r.worklist = append(r.worklist, roots...)
+ for len(r.worklist) > 0 {
+ shadow, r.worklist = r.worklist, shadow[:0]
+ for _, f := range shadow {
+ r.visitFunc(f)
+ }
+ }
+ return r.result
+}
+
+// interfaces(C) returns all currently known interfaces implemented by C.
+func (r *rta) interfaces(C types.Type) []*types.Interface {
+ // Ascertain set of interfaces C implements
+ // and update 'implements' relation.
+ var ifaces []*types.Interface
+ r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) {
+ if I := I.(*types.Interface); types.Implements(C, I) {
+ concs, _ := concs.([]types.Type)
+ r.interfaceTypes.Set(I, append(concs, C))
+ ifaces = append(ifaces, I)
+ }
+ })
+ r.concreteTypes.Set(C, ifaces)
+ return ifaces
+}
+
+// implementations(I) returns all currently known concrete types that implement I.
+func (r *rta) implementations(I *types.Interface) []types.Type {
+ var concs []types.Type
+ if v := r.interfaceTypes.At(I); v != nil {
+ concs = v.([]types.Type)
+ } else {
+ // First time seeing this interface.
+ // Update the 'implements' relation.
+ r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) {
+ if types.Implements(C, I) {
+ ifaces, _ := ifaces.([]*types.Interface)
+ r.concreteTypes.Set(C, append(ifaces, I))
+ concs = append(concs, C)
+ }
+ })
+ r.interfaceTypes.Set(I, concs)
+ }
+ return concs
+}
+
+// addRuntimeType is called for each concrete type that can be the
+// dynamic type of some interface or reflect.Value.
+// Adapted from needMethods in go/ir/builder.go
+//
+func (r *rta) addRuntimeType(T types.Type, skip bool) {
+ if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok {
+ if skip && !prev {
+ r.result.RuntimeTypes.Set(T, skip)
+ }
+ return
+ }
+ r.result.RuntimeTypes.Set(T, skip)
+
+ mset := r.prog.MethodSets.MethodSet(T)
+
+ if _, ok := T.Underlying().(*types.Interface); !ok {
+ // T is a new concrete type.
+ for i, n := 0, mset.Len(); i < n; i++ {
+ sel := mset.At(i)
+ m := sel.Obj()
+
+ if m.Exported() {
+ // Exported methods are always potentially callable via reflection.
+ r.addReachable(r.prog.MethodValue(sel), true)
+ }
+ }
+
+ // Add callgraph edge for each existing dynamic
+ // "invoke"-mode call via that interface.
+ for _, I := range r.interfaces(T) {
+ sites, _ := r.invokeSites.At(I).([]ir.CallInstruction)
+ for _, site := range sites {
+ r.addInvokeEdge(site, T)
+ }
+ }
+ }
+
+ // Precondition: T is not a method signature (*Signature with Recv()!=nil).
+ // Recursive case: skip => don't call makeMethods(T).
+ // Each package maintains its own set of types it has visited.
+
+ var n *types.Named
+ switch T := T.(type) {
+ case *types.Named:
+ n = T
+ case *types.Pointer:
+ n, _ = T.Elem().(*types.Named)
+ }
+ if n != nil {
+ owner := n.Obj().Pkg()
+ if owner == nil {
+ return // built-in error type
+ }
+ }
+
+ // Recursion over signatures of each exported method.
+ for i := 0; i < mset.Len(); i++ {
+ if mset.At(i).Obj().Exported() {
+ sig := mset.At(i).Type().(*types.Signature)
+ r.addRuntimeType(sig.Params(), true) // skip the Tuple itself
+ r.addRuntimeType(sig.Results(), true) // skip the Tuple itself
+ }
+ }
+
+ switch t := T.(type) {
+ case *types.Basic:
+ // nop
+
+ case *types.Interface:
+ // nop---handled by recursion over method set.
+
+ case *types.Pointer:
+ r.addRuntimeType(t.Elem(), false)
+
+ case *types.Slice:
+ r.addRuntimeType(t.Elem(), false)
+
+ case *types.Chan:
+ r.addRuntimeType(t.Elem(), false)
+
+ case *types.Map:
+ r.addRuntimeType(t.Key(), false)
+ r.addRuntimeType(t.Elem(), false)
+
+ case *types.Signature:
+ if t.Recv() != nil {
+ panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv()))
+ }
+ r.addRuntimeType(t.Params(), true) // skip the Tuple itself
+ r.addRuntimeType(t.Results(), true) // skip the Tuple itself
+
+ case *types.Named:
+ // A pointer-to-named type can be derived from a named
+ // type via reflection. It may have methods too.
+ r.addRuntimeType(types.NewPointer(T), false)
+
+ // Consider 'type T struct{S}' where S has methods.
+ // Reflection provides no way to get from T to struct{S},
+ // only to S, so the method set of struct{S} is unwanted,
+ // so set 'skip' flag during recursion.
+ r.addRuntimeType(t.Underlying(), true)
+
+ case *types.Array:
+ r.addRuntimeType(t.Elem(), false)
+
+ case *types.Struct:
+ for i, n := 0, t.NumFields(); i < n; i++ {
+ r.addRuntimeType(t.Field(i).Type(), false)
+ }
+
+ case *types.Tuple:
+ for i, n := 0, t.Len(); i < n; i++ {
+ r.addRuntimeType(t.At(i).Type(), false)
+ }
+
+ default:
+ panic(T)
+ }
+}