// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This package provides Rapid Type Analysis (RTA) for Go, a fast // algorithm for call graph construction and discovery of reachable code // (and hence dead code) and runtime types. The algorithm was first // described in: // // David F. Bacon and Peter F. Sweeney. 1996. // Fast static analysis of C++ virtual function calls. (OOPSLA '96) // http://doi.acm.org/10.1145/236337.236371 // // The algorithm uses dynamic programming to tabulate the cross-product // of the set of known "address taken" functions with the set of known // dynamic calls of the same type. As each new address-taken function // is discovered, call graph edges are added from each known callsite, // and as each new call site is discovered, call graph edges are added // from it to each known address-taken function. // // A similar approach is used for dynamic calls via interfaces: it // tabulates the cross-product of the set of known "runtime types", // i.e. types that may appear in an interface value, or be derived from // one via reflection, with the set of known "invoke"-mode dynamic // calls. As each new "runtime type" is discovered, call edges are // added from the known call sites, and as each new call site is // discovered, call graph edges are added to each compatible // method. // // In addition, we must consider all exported methods of any runtime type // as reachable, since they may be called via reflection. // // Each time a newly added call edge causes a new function to become // reachable, the code of that function is analyzed for more call sites, // address-taken functions, and runtime types. The process continues // until a fixed point is achieved. // // The resulting call graph is less precise than one produced by pointer // analysis, but the algorithm is much faster. For example, running the // cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s // for points-to analysis. // package rta // import "honnef.co/go/tools/callgraph/rta" // TODO(adonovan): test it by connecting it to the interpreter and // replacing all "unreachable" functions by a special intrinsic, and // ensure that that intrinsic is never called. import ( "fmt" "go/types" "golang.org/x/tools/go/types/typeutil" "honnef.co/go/tools/callgraph" "honnef.co/go/tools/ir" ) // A Result holds the results of Rapid Type Analysis, which includes the // set of reachable functions/methods, runtime types, and the call graph. // type Result struct { // CallGraph is the discovered callgraph. // It does not include edges for calls made via reflection. CallGraph *callgraph.Graph // Reachable contains the set of reachable functions and methods. // This includes exported methods of runtime types, since // they may be accessed via reflection. // The value indicates whether the function is address-taken. // // (We wrap the bool in a struct to avoid inadvertent use of // "if Reachable[f] {" to test for set membership.) Reachable map[*ir.Function]struct{ AddrTaken bool } // RuntimeTypes contains the set of types that are needed at // runtime, for interfaces or reflection. // // The value indicates whether the type is inaccessible to reflection. // Consider: // type A struct{B} // fmt.Println(new(A)) // Types *A, A and B are accessible to reflection, but the unnamed // type struct{B} is not. RuntimeTypes typeutil.Map } // Working state of the RTA algorithm. type rta struct { result *Result prog *ir.Program worklist []*ir.Function // list of functions to visit // addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature. // Keys are *types.Signature, values are map[*ir.Function]bool sets. addrTakenFuncsBySig typeutil.Map // dynCallSites contains all dynamic "call"-mode call sites, grouped by signature. // Keys are *types.Signature, values are unordered []ir.CallInstruction. dynCallSites typeutil.Map // invokeSites contains all "invoke"-mode call sites, grouped by interface. // Keys are *types.Interface (never *types.Named), // Values are unordered []ir.CallInstruction sets. invokeSites typeutil.Map // The following two maps together define the subset of the // m:n "implements" relation needed by the algorithm. // concreteTypes maps each concrete type to the set of interfaces that it implements. // Keys are types.Type, values are unordered []*types.Interface. // Only concrete types used as MakeInterface operands are included. concreteTypes typeutil.Map // interfaceTypes maps each interface type to // the set of concrete types that implement it. // Keys are *types.Interface, values are unordered []types.Type. // Only interfaces used in "invoke"-mode CallInstructions are included. interfaceTypes typeutil.Map } // addReachable marks a function as potentially callable at run-time, // and ensures that it gets processed. func (r *rta) addReachable(f *ir.Function, addrTaken bool) { reachable := r.result.Reachable n := len(reachable) v := reachable[f] if addrTaken { v.AddrTaken = true } reachable[f] = v if len(reachable) > n { // First time seeing f. Add it to the worklist. r.worklist = append(r.worklist, f) } } // addEdge adds the specified call graph edge, and marks it reachable. // addrTaken indicates whether to mark the callee as "address-taken". func (r *rta) addEdge(site ir.CallInstruction, callee *ir.Function, addrTaken bool) { r.addReachable(callee, addrTaken) if g := r.result.CallGraph; g != nil { if site.Parent() == nil { panic(site) } from := g.CreateNode(site.Parent()) to := g.CreateNode(callee) callgraph.AddEdge(from, site, to) } } // ---------- addrTakenFuncs × dynCallSites ---------- // visitAddrTakenFunc is called each time we encounter an address-taken function f. func (r *rta) visitAddrTakenFunc(f *ir.Function) { // Create two-level map (Signature -> Function -> bool). S := f.Signature funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ir.Function]bool) if funcs == nil { funcs = make(map[*ir.Function]bool) r.addrTakenFuncsBySig.Set(S, funcs) } if !funcs[f] { // First time seeing f. funcs[f] = true // If we've seen any dyncalls of this type, mark it reachable, // and add call graph edges. sites, _ := r.dynCallSites.At(S).([]ir.CallInstruction) for _, site := range sites { r.addEdge(site, f, true) } } } // visitDynCall is called each time we encounter a dynamic "call"-mode call. func (r *rta) visitDynCall(site ir.CallInstruction) { S := site.Common().Signature() // Record the call site. sites, _ := r.dynCallSites.At(S).([]ir.CallInstruction) r.dynCallSites.Set(S, append(sites, site)) // For each function of signature S that we know is address-taken, // mark it reachable. We'll add the callgraph edges later. funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ir.Function]bool) for g := range funcs { r.addEdge(site, g, true) } } // ---------- concrete types × invoke sites ---------- // addInvokeEdge is called for each new pair (site, C) in the matrix. func (r *rta) addInvokeEdge(site ir.CallInstruction, C types.Type) { // Ascertain the concrete method of C to be called. imethod := site.Common().Method cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name())) r.addEdge(site, cmethod, true) } // visitInvoke is called each time the algorithm encounters an "invoke"-mode call. func (r *rta) visitInvoke(site ir.CallInstruction) { I := site.Common().Value.Type().Underlying().(*types.Interface) // Record the invoke site. sites, _ := r.invokeSites.At(I).([]ir.CallInstruction) r.invokeSites.Set(I, append(sites, site)) // Add callgraph edge for each existing // address-taken concrete type implementing I. for _, C := range r.implementations(I) { r.addInvokeEdge(site, C) } } // ---------- main algorithm ---------- // visitFunc processes function f. func (r *rta) visitFunc(f *ir.Function) { var space [32]*ir.Value // preallocate space for common case for _, b := range f.Blocks { for _, instr := range b.Instrs { rands := instr.Operands(space[:0]) switch instr := instr.(type) { case ir.CallInstruction: call := instr.Common() if call.IsInvoke() { r.visitInvoke(instr) } else if g := call.StaticCallee(); g != nil { r.addEdge(instr, g, false) } else if _, ok := call.Value.(*ir.Builtin); !ok { r.visitDynCall(instr) } // Ignore the call-position operand when // looking for address-taken Functions. // Hack: assume this is rands[0]. rands = rands[1:] case *ir.MakeInterface: r.addRuntimeType(instr.X.Type(), false) } // Process all address-taken functions. for _, op := range rands { if g, ok := (*op).(*ir.Function); ok { r.visitAddrTakenFunc(g) } } } } } // Analyze performs Rapid Type Analysis, starting at the specified root // functions. It returns nil if no roots were specified. // // If buildCallGraph is true, Result.CallGraph will contain a call // graph; otherwise, only the other fields (reachable functions) are // populated. // func Analyze(roots []*ir.Function, buildCallGraph bool) *Result { if len(roots) == 0 { return nil } r := &rta{ result: &Result{Reachable: make(map[*ir.Function]struct{ AddrTaken bool })}, prog: roots[0].Prog, } if buildCallGraph { // TODO(adonovan): change callgraph API to eliminate the // notion of a distinguished root node. Some callgraphs // have many roots, or none. r.result.CallGraph = callgraph.New(roots[0]) } hasher := typeutil.MakeHasher() r.result.RuntimeTypes.SetHasher(hasher) r.addrTakenFuncsBySig.SetHasher(hasher) r.dynCallSites.SetHasher(hasher) r.invokeSites.SetHasher(hasher) r.concreteTypes.SetHasher(hasher) r.interfaceTypes.SetHasher(hasher) // Visit functions, processing their instructions, and adding // new functions to the worklist, until a fixed point is // reached. var shadow []*ir.Function // for efficiency, we double-buffer the worklist r.worklist = append(r.worklist, roots...) for len(r.worklist) > 0 { shadow, r.worklist = r.worklist, shadow[:0] for _, f := range shadow { r.visitFunc(f) } } return r.result } // interfaces(C) returns all currently known interfaces implemented by C. func (r *rta) interfaces(C types.Type) []*types.Interface { // Ascertain set of interfaces C implements // and update 'implements' relation. var ifaces []*types.Interface r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) { if I := I.(*types.Interface); types.Implements(C, I) { concs, _ := concs.([]types.Type) r.interfaceTypes.Set(I, append(concs, C)) ifaces = append(ifaces, I) } }) r.concreteTypes.Set(C, ifaces) return ifaces } // implementations(I) returns all currently known concrete types that implement I. func (r *rta) implementations(I *types.Interface) []types.Type { var concs []types.Type if v := r.interfaceTypes.At(I); v != nil { concs = v.([]types.Type) } else { // First time seeing this interface. // Update the 'implements' relation. r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) { if types.Implements(C, I) { ifaces, _ := ifaces.([]*types.Interface) r.concreteTypes.Set(C, append(ifaces, I)) concs = append(concs, C) } }) r.interfaceTypes.Set(I, concs) } return concs } // addRuntimeType is called for each concrete type that can be the // dynamic type of some interface or reflect.Value. // Adapted from needMethods in go/ir/builder.go // func (r *rta) addRuntimeType(T types.Type, skip bool) { if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok { if skip && !prev { r.result.RuntimeTypes.Set(T, skip) } return } r.result.RuntimeTypes.Set(T, skip) mset := r.prog.MethodSets.MethodSet(T) if _, ok := T.Underlying().(*types.Interface); !ok { // T is a new concrete type. for i, n := 0, mset.Len(); i < n; i++ { sel := mset.At(i) m := sel.Obj() if m.Exported() { // Exported methods are always potentially callable via reflection. r.addReachable(r.prog.MethodValue(sel), true) } } // Add callgraph edge for each existing dynamic // "invoke"-mode call via that interface. for _, I := range r.interfaces(T) { sites, _ := r.invokeSites.At(I).([]ir.CallInstruction) for _, site := range sites { r.addInvokeEdge(site, T) } } } // Precondition: T is not a method signature (*Signature with Recv()!=nil). // Recursive case: skip => don't call makeMethods(T). // Each package maintains its own set of types it has visited. var n *types.Named switch T := T.(type) { case *types.Named: n = T case *types.Pointer: n, _ = T.Elem().(*types.Named) } if n != nil { owner := n.Obj().Pkg() if owner == nil { return // built-in error type } } // Recursion over signatures of each exported method. for i := 0; i < mset.Len(); i++ { if mset.At(i).Obj().Exported() { sig := mset.At(i).Type().(*types.Signature) r.addRuntimeType(sig.Params(), true) // skip the Tuple itself r.addRuntimeType(sig.Results(), true) // skip the Tuple itself } } switch t := T.(type) { case *types.Basic: // nop case *types.Interface: // nop---handled by recursion over method set. case *types.Pointer: r.addRuntimeType(t.Elem(), false) case *types.Slice: r.addRuntimeType(t.Elem(), false) case *types.Chan: r.addRuntimeType(t.Elem(), false) case *types.Map: r.addRuntimeType(t.Key(), false) r.addRuntimeType(t.Elem(), false) case *types.Signature: if t.Recv() != nil { panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv())) } r.addRuntimeType(t.Params(), true) // skip the Tuple itself r.addRuntimeType(t.Results(), true) // skip the Tuple itself case *types.Named: // A pointer-to-named type can be derived from a named // type via reflection. It may have methods too. r.addRuntimeType(types.NewPointer(T), false) // Consider 'type T struct{S}' where S has methods. // Reflection provides no way to get from T to struct{S}, // only to S, so the method set of struct{S} is unwanted, // so set 'skip' flag during recursion. r.addRuntimeType(t.Underlying(), true) case *types.Array: r.addRuntimeType(t.Elem(), false) case *types.Struct: for i, n := 0, t.NumFields(); i < n; i++ { r.addRuntimeType(t.Field(i).Type(), false) } case *types.Tuple: for i, n := 0, t.Len(); i < n; i++ { r.addRuntimeType(t.At(i).Type(), false) } default: panic(T) } }