1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This package provides Rapid Type Analysis (RTA) for Go, a fast
6 // algorithm for call graph construction and discovery of reachable code
7 // (and hence dead code) and runtime types. The algorithm was first
10 // David F. Bacon and Peter F. Sweeney. 1996.
11 // Fast static analysis of C++ virtual function calls. (OOPSLA '96)
12 // http://doi.acm.org/10.1145/236337.236371
14 // The algorithm uses dynamic programming to tabulate the cross-product
15 // of the set of known "address taken" functions with the set of known
16 // dynamic calls of the same type. As each new address-taken function
17 // is discovered, call graph edges are added from each known callsite,
18 // and as each new call site is discovered, call graph edges are added
19 // from it to each known address-taken function.
21 // A similar approach is used for dynamic calls via interfaces: it
22 // tabulates the cross-product of the set of known "runtime types",
23 // i.e. types that may appear in an interface value, or be derived from
24 // one via reflection, with the set of known "invoke"-mode dynamic
25 // calls. As each new "runtime type" is discovered, call edges are
26 // added from the known call sites, and as each new call site is
27 // discovered, call graph edges are added to each compatible
30 // In addition, we must consider all exported methods of any runtime type
31 // as reachable, since they may be called via reflection.
33 // Each time a newly added call edge causes a new function to become
34 // reachable, the code of that function is analyzed for more call sites,
35 // address-taken functions, and runtime types. The process continues
36 // until a fixed point is achieved.
38 // The resulting call graph is less precise than one produced by pointer
39 // analysis, but the algorithm is much faster. For example, running the
40 // cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s
41 // for points-to analysis.
45 // TODO(adonovan): test it by connecting it to the interpreter and
46 // replacing all "unreachable" functions by a special intrinsic, and
47 // ensure that that intrinsic is never called.
53 "honnef.co/go/tools/go/callgraph"
54 "honnef.co/go/tools/go/ir"
56 "golang.org/x/tools/go/types/typeutil"
59 // A Result holds the results of Rapid Type Analysis, which includes the
60 // set of reachable functions/methods, runtime types, and the call graph.
63 // CallGraph is the discovered callgraph.
64 // It does not include edges for calls made via reflection.
65 CallGraph *callgraph.Graph
67 // Reachable contains the set of reachable functions and methods.
68 // This includes exported methods of runtime types, since
69 // they may be accessed via reflection.
70 // The value indicates whether the function is address-taken.
72 // (We wrap the bool in a struct to avoid inadvertent use of
73 // "if Reachable[f] {" to test for set membership.)
74 Reachable map[*ir.Function]struct{ AddrTaken bool }
76 // RuntimeTypes contains the set of types that are needed at
77 // runtime, for interfaces or reflection.
79 // The value indicates whether the type is inaccessible to reflection.
82 // fmt.Println(new(A))
83 // Types *A, A and B are accessible to reflection, but the unnamed
84 // type struct{B} is not.
85 RuntimeTypes typeutil.Map
88 // Working state of the RTA algorithm.
94 worklist []*ir.Function // list of functions to visit
96 // addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature.
97 // Keys are *types.Signature, values are map[*ir.Function]bool sets.
98 addrTakenFuncsBySig typeutil.Map
100 // dynCallSites contains all dynamic "call"-mode call sites, grouped by signature.
101 // Keys are *types.Signature, values are unordered []ir.CallInstruction.
102 dynCallSites typeutil.Map
104 // invokeSites contains all "invoke"-mode call sites, grouped by interface.
105 // Keys are *types.Interface (never *types.Named),
106 // Values are unordered []ir.CallInstruction sets.
107 invokeSites typeutil.Map
109 // The following two maps together define the subset of the
110 // m:n "implements" relation needed by the algorithm.
112 // concreteTypes maps each concrete type to the set of interfaces that it implements.
113 // Keys are types.Type, values are unordered []*types.Interface.
114 // Only concrete types used as MakeInterface operands are included.
115 concreteTypes typeutil.Map
117 // interfaceTypes maps each interface type to
118 // the set of concrete types that implement it.
119 // Keys are *types.Interface, values are unordered []types.Type.
120 // Only interfaces used in "invoke"-mode CallInstructions are included.
121 interfaceTypes typeutil.Map
124 // addReachable marks a function as potentially callable at run-time,
125 // and ensures that it gets processed.
126 func (r *rta) addReachable(f *ir.Function, addrTaken bool) {
127 reachable := r.result.Reachable
134 if len(reachable) > n {
135 // First time seeing f. Add it to the worklist.
136 r.worklist = append(r.worklist, f)
140 // addEdge adds the specified call graph edge, and marks it reachable.
141 // addrTaken indicates whether to mark the callee as "address-taken".
142 func (r *rta) addEdge(site ir.CallInstruction, callee *ir.Function, addrTaken bool) {
143 r.addReachable(callee, addrTaken)
145 if g := r.result.CallGraph; g != nil {
146 if site.Parent() == nil {
149 from := g.CreateNode(site.Parent())
150 to := g.CreateNode(callee)
151 callgraph.AddEdge(from, site, to)
155 // ---------- addrTakenFuncs × dynCallSites ----------
157 // visitAddrTakenFunc is called each time we encounter an address-taken function f.
158 func (r *rta) visitAddrTakenFunc(f *ir.Function) {
159 // Create two-level map (Signature -> Function -> bool).
161 funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ir.Function]bool)
163 funcs = make(map[*ir.Function]bool)
164 r.addrTakenFuncsBySig.Set(S, funcs)
167 // First time seeing f.
170 // If we've seen any dyncalls of this type, mark it reachable,
171 // and add call graph edges.
172 sites, _ := r.dynCallSites.At(S).([]ir.CallInstruction)
173 for _, site := range sites {
174 r.addEdge(site, f, true)
179 // visitDynCall is called each time we encounter a dynamic "call"-mode call.
180 func (r *rta) visitDynCall(site ir.CallInstruction) {
181 S := site.Common().Signature()
183 // Record the call site.
184 sites, _ := r.dynCallSites.At(S).([]ir.CallInstruction)
185 r.dynCallSites.Set(S, append(sites, site))
187 // For each function of signature S that we know is address-taken,
188 // mark it reachable. We'll add the callgraph edges later.
189 funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ir.Function]bool)
190 for g := range funcs {
191 r.addEdge(site, g, true)
195 // ---------- concrete types × invoke sites ----------
197 // addInvokeEdge is called for each new pair (site, C) in the matrix.
198 func (r *rta) addInvokeEdge(site ir.CallInstruction, C types.Type) {
199 // Ascertain the concrete method of C to be called.
200 imethod := site.Common().Method
201 cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name()))
202 r.addEdge(site, cmethod, true)
205 // visitInvoke is called each time the algorithm encounters an "invoke"-mode call.
206 func (r *rta) visitInvoke(site ir.CallInstruction) {
207 I := site.Common().Value.Type().Underlying().(*types.Interface)
209 // Record the invoke site.
210 sites, _ := r.invokeSites.At(I).([]ir.CallInstruction)
211 r.invokeSites.Set(I, append(sites, site))
213 // Add callgraph edge for each existing
214 // address-taken concrete type implementing I.
215 for _, C := range r.implementations(I) {
216 r.addInvokeEdge(site, C)
220 // ---------- main algorithm ----------
222 // visitFunc processes function f.
223 func (r *rta) visitFunc(f *ir.Function) {
224 var space [32]*ir.Value // preallocate space for common case
226 for _, b := range f.Blocks {
227 for _, instr := range b.Instrs {
228 rands := instr.Operands(space[:0])
230 switch instr := instr.(type) {
231 case ir.CallInstruction:
232 call := instr.Common()
235 } else if g := call.StaticCallee(); g != nil {
236 r.addEdge(instr, g, false)
237 } else if _, ok := call.Value.(*ir.Builtin); !ok {
238 r.visitDynCall(instr)
241 // Ignore the call-position operand when
242 // looking for address-taken Functions.
243 // Hack: assume this is rands[0].
246 case *ir.MakeInterface:
247 r.addRuntimeType(instr.X.Type(), false)
250 // Process all address-taken functions.
251 for _, op := range rands {
252 if g, ok := (*op).(*ir.Function); ok {
253 r.visitAddrTakenFunc(g)
260 // Analyze performs Rapid Type Analysis, starting at the specified root
261 // functions. It returns nil if no roots were specified.
263 // If buildCallGraph is true, Result.CallGraph will contain a call
264 // graph; otherwise, only the other fields (reachable functions) are
267 func Analyze(roots []*ir.Function, buildCallGraph bool) *Result {
273 result: &Result{Reachable: make(map[*ir.Function]struct{ AddrTaken bool })},
278 // TODO(adonovan): change callgraph API to eliminate the
279 // notion of a distinguished root node. Some callgraphs
280 // have many roots, or none.
281 r.result.CallGraph = callgraph.New(roots[0])
284 hasher := typeutil.MakeHasher()
285 r.result.RuntimeTypes.SetHasher(hasher)
286 r.addrTakenFuncsBySig.SetHasher(hasher)
287 r.dynCallSites.SetHasher(hasher)
288 r.invokeSites.SetHasher(hasher)
289 r.concreteTypes.SetHasher(hasher)
290 r.interfaceTypes.SetHasher(hasher)
292 // Visit functions, processing their instructions, and adding
293 // new functions to the worklist, until a fixed point is
295 var shadow []*ir.Function // for efficiency, we double-buffer the worklist
296 r.worklist = append(r.worklist, roots...)
297 for len(r.worklist) > 0 {
298 shadow, r.worklist = r.worklist, shadow[:0]
299 for _, f := range shadow {
306 // interfaces(C) returns all currently known interfaces implemented by C.
307 func (r *rta) interfaces(C types.Type) []*types.Interface {
308 // Ascertain set of interfaces C implements
309 // and update 'implements' relation.
310 var ifaces []*types.Interface
311 r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) {
312 if I := I.(*types.Interface); types.Implements(C, I) {
313 concs, _ := concs.([]types.Type)
314 r.interfaceTypes.Set(I, append(concs, C))
315 ifaces = append(ifaces, I)
318 r.concreteTypes.Set(C, ifaces)
322 // implementations(I) returns all currently known concrete types that implement I.
323 func (r *rta) implementations(I *types.Interface) []types.Type {
324 var concs []types.Type
325 if v := r.interfaceTypes.At(I); v != nil {
326 concs = v.([]types.Type)
328 // First time seeing this interface.
329 // Update the 'implements' relation.
330 r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) {
331 if types.Implements(C, I) {
332 ifaces, _ := ifaces.([]*types.Interface)
333 r.concreteTypes.Set(C, append(ifaces, I))
334 concs = append(concs, C)
337 r.interfaceTypes.Set(I, concs)
342 // addRuntimeType is called for each concrete type that can be the
343 // dynamic type of some interface or reflect.Value.
344 // Adapted from needMethods in go/ir/builder.go
346 func (r *rta) addRuntimeType(T types.Type, skip bool) {
347 if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok {
349 r.result.RuntimeTypes.Set(T, skip)
353 r.result.RuntimeTypes.Set(T, skip)
355 mset := r.prog.MethodSets.MethodSet(T)
357 if _, ok := T.Underlying().(*types.Interface); !ok {
358 // T is a new concrete type.
359 for i, n := 0, mset.Len(); i < n; i++ {
364 // Exported methods are always potentially callable via reflection.
365 r.addReachable(r.prog.MethodValue(sel), true)
369 // Add callgraph edge for each existing dynamic
370 // "invoke"-mode call via that interface.
371 for _, I := range r.interfaces(T) {
372 sites, _ := r.invokeSites.At(I).([]ir.CallInstruction)
373 for _, site := range sites {
374 r.addInvokeEdge(site, T)
379 // Precondition: T is not a method signature (*Signature with Recv()!=nil).
380 // Recursive case: skip => don't call makeMethods(T).
381 // Each package maintains its own set of types it has visited.
384 switch T := T.(type) {
388 n, _ = T.Elem().(*types.Named)
391 owner := n.Obj().Pkg()
393 return // built-in error type
397 // Recursion over signatures of each exported method.
398 for i := 0; i < mset.Len(); i++ {
399 if mset.At(i).Obj().Exported() {
400 sig := mset.At(i).Type().(*types.Signature)
401 r.addRuntimeType(sig.Params(), true) // skip the Tuple itself
402 r.addRuntimeType(sig.Results(), true) // skip the Tuple itself
406 switch t := T.(type) {
410 case *types.Interface:
411 // nop---handled by recursion over method set.
414 r.addRuntimeType(t.Elem(), false)
417 r.addRuntimeType(t.Elem(), false)
420 r.addRuntimeType(t.Elem(), false)
423 r.addRuntimeType(t.Key(), false)
424 r.addRuntimeType(t.Elem(), false)
426 case *types.Signature:
428 panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv()))
430 r.addRuntimeType(t.Params(), true) // skip the Tuple itself
431 r.addRuntimeType(t.Results(), true) // skip the Tuple itself
434 // A pointer-to-named type can be derived from a named
435 // type via reflection. It may have methods too.
436 r.addRuntimeType(types.NewPointer(T), false)
438 // Consider 'type T struct{S}' where S has methods.
439 // Reflection provides no way to get from T to struct{S},
440 // only to S, so the method set of struct{S} is unwanted,
441 // so set 'skip' flag during recursion.
442 r.addRuntimeType(t.Underlying(), true)
445 r.addRuntimeType(t.Elem(), false)
448 for i, n := 0, t.NumFields(); i < n; i++ {
449 r.addRuntimeType(t.Field(i).Type(), false)
453 for i, n := 0, t.Len(); i < n; i++ {
454 r.addRuntimeType(t.At(i).Type(), false)