--- /dev/null
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pointer
+
+// This file defines the main datatypes and Analyze function of the pointer analysis.
+
+import (
+ "fmt"
+ "go/token"
+ "go/types"
+ "io"
+ "os"
+ "reflect"
+ "runtime"
+ "runtime/debug"
+ "sort"
+
+ "golang.org/x/tools/go/callgraph"
+ "golang.org/x/tools/go/ssa"
+ "golang.org/x/tools/go/types/typeutil"
+)
+
+const (
+ // optimization options; enable all when committing
+ optRenumber = true // enable renumbering optimization (makes logs hard to read)
+ optHVN = true // enable pointer equivalence via Hash-Value Numbering
+
+ // debugging options; disable all when committing
+ debugHVN = false // enable assertions in HVN
+ debugHVNVerbose = false // enable extra HVN logging
+ debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
+ debugTimers = false // show running time of each phase
+)
+
+// object.flags bitmask values.
+const (
+ otTagged = 1 << iota // type-tagged object
+ otIndirect // type-tagged object with indirect payload
+ otFunction // function object
+)
+
+// An object represents a contiguous block of memory to which some
+// (generalized) pointer may point.
+//
+// (Note: most variables called 'obj' are not *objects but nodeids
+// such that a.nodes[obj].obj != nil.)
+//
+type object struct {
+ // flags is a bitset of the node type (ot*) flags defined above.
+ flags uint32
+
+ // Number of following nodes belonging to the same "object"
+ // allocation. Zero for all other nodes.
+ size uint32
+
+ // data describes this object; it has one of these types:
+ //
+ // ssa.Value for an object allocated by an SSA operation.
+ // types.Type for an rtype instance object or *rtype-tagged object.
+ // string for an instrinsic object, e.g. the array behind os.Args.
+ // nil for an object allocated by an instrinsic.
+ // (cgn provides the identity of the intrinsic.)
+ data interface{}
+
+ // The call-graph node (=context) in which this object was allocated.
+ // May be nil for global objects: Global, Const, some Functions.
+ cgn *cgnode
+}
+
+// nodeid denotes a node.
+// It is an index within analysis.nodes.
+// We use small integers, not *node pointers, for many reasons:
+// - they are smaller on 64-bit systems.
+// - sets of them can be represented compactly in bitvectors or BDDs.
+// - order matters; a field offset can be computed by simple addition.
+type nodeid uint32
+
+// A node is an equivalence class of memory locations.
+// Nodes may be pointers, pointed-to locations, neither, or both.
+//
+// Nodes that are pointed-to locations ("labels") have an enclosing
+// object (see analysis.enclosingObject).
+//
+type node struct {
+ // If non-nil, this node is the start of an object
+ // (addressable memory location).
+ // The following obj.size nodes implicitly belong to the object;
+ // they locate their object by scanning back.
+ obj *object
+
+ // The type of the field denoted by this node. Non-aggregate,
+ // unless this is an tagged.T node (i.e. the thing
+ // pointed to by an interface) in which case typ is that type.
+ typ types.Type
+
+ // subelement indicates which directly embedded subelement of
+ // an object of aggregate type (struct, tuple, array) this is.
+ subelement *fieldInfo // e.g. ".a.b[*].c"
+
+ // Solver state for the canonical node of this pointer-
+ // equivalence class. Each node is created with its own state
+ // but they become shared after HVN.
+ solve *solverState
+}
+
+// An analysis instance holds the state of a single pointer analysis problem.
+type analysis struct {
+ config *Config // the client's control/observer interface
+ prog *ssa.Program // the program being analyzed
+ log io.Writer // log stream; nil to disable
+ panicNode nodeid // sink for panic, source for recover
+ nodes []*node // indexed by nodeid
+ flattenMemo map[types.Type][]*fieldInfo // memoization of flatten()
+ trackTypes map[types.Type]bool // memoization of shouldTrack()
+ constraints []constraint // set of constraints
+ cgnodes []*cgnode // all cgnodes
+ genq []*cgnode // queue of functions to generate constraints for
+ intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns
+ globalval map[ssa.Value]nodeid // node for each global ssa.Value
+ globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
+ localval map[ssa.Value]nodeid // node for each local ssa.Value
+ localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
+ atFuncs map[*ssa.Function]bool // address-taken functions (for presolver)
+ mapValues []nodeid // values of makemap objects (indirect in HVN)
+ work nodeset // solver's worklist
+ result *Result // results of the analysis
+ track track // pointerlike types whose aliasing we track
+ deltaSpace []int // working space for iterating over PTS deltas
+
+ // Reflection & intrinsics:
+ hasher typeutil.Hasher // cache of type hashes
+ reflectValueObj types.Object // type symbol for reflect.Value (if present)
+ reflectValueCall *ssa.Function // (reflect.Value).Call
+ reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present)
+ reflectRtypePtr *types.Pointer // *reflect.rtype
+ reflectType *types.Named // reflect.Type
+ rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T
+ reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value
+ runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer
+}
+
+// enclosingObj returns the first node of the addressable memory
+// object that encloses node id. Panic ensues if that node does not
+// belong to any object.
+func (a *analysis) enclosingObj(id nodeid) nodeid {
+ // Find previous node with obj != nil.
+ for i := id; i >= 0; i-- {
+ n := a.nodes[i]
+ if obj := n.obj; obj != nil {
+ if i+nodeid(obj.size) <= id {
+ break // out of bounds
+ }
+ return i
+ }
+ }
+ panic("node has no enclosing object")
+}
+
+// labelFor returns the Label for node id.
+// Panic ensues if that node is not addressable.
+func (a *analysis) labelFor(id nodeid) *Label {
+ return &Label{
+ obj: a.nodes[a.enclosingObj(id)].obj,
+ subelement: a.nodes[id].subelement,
+ }
+}
+
+func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) {
+ msg := fmt.Sprintf(format, args...)
+ if a.log != nil {
+ fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg)
+ }
+ a.result.Warnings = append(a.result.Warnings, Warning{pos, msg})
+}
+
+// computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries.
+func (a *analysis) computeTrackBits() {
+ if len(a.config.extendedQueries) != 0 {
+ // TODO(dh): only track the types necessary for the query.
+ a.track = trackAll
+ return
+ }
+ var queryTypes []types.Type
+ for v := range a.config.Queries {
+ queryTypes = append(queryTypes, v.Type())
+ }
+ for v := range a.config.IndirectQueries {
+ queryTypes = append(queryTypes, mustDeref(v.Type()))
+ }
+ for _, t := range queryTypes {
+ switch t.Underlying().(type) {
+ case *types.Chan:
+ a.track |= trackChan
+ case *types.Map:
+ a.track |= trackMap
+ case *types.Pointer:
+ a.track |= trackPtr
+ case *types.Slice:
+ a.track |= trackSlice
+ case *types.Interface:
+ a.track = trackAll
+ return
+ }
+ if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) {
+ a.track = trackAll
+ return
+ }
+ }
+}
+
+// Analyze runs the pointer analysis with the scope and options
+// specified by config, and returns the (synthetic) root of the callgraph.
+//
+// Pointer analysis of a transitively closed well-typed program should
+// always succeed. An error can occur only due to an internal bug.
+//
+func Analyze(config *Config) (result *Result, err error) {
+ if config.Mains == nil {
+ return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)")
+ }
+ defer func() {
+ if p := recover(); p != nil {
+ err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p)
+ fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:")
+ debug.PrintStack()
+ }
+ }()
+
+ a := &analysis{
+ config: config,
+ log: config.Log,
+ prog: config.prog(),
+ globalval: make(map[ssa.Value]nodeid),
+ globalobj: make(map[ssa.Value]nodeid),
+ flattenMemo: make(map[types.Type][]*fieldInfo),
+ trackTypes: make(map[types.Type]bool),
+ atFuncs: make(map[*ssa.Function]bool),
+ hasher: typeutil.MakeHasher(),
+ intrinsics: make(map[*ssa.Function]intrinsic),
+ result: &Result{
+ Queries: make(map[ssa.Value]Pointer),
+ IndirectQueries: make(map[ssa.Value]Pointer),
+ },
+ deltaSpace: make([]int, 0, 100),
+ }
+
+ if false {
+ a.log = os.Stderr // for debugging crashes; extremely verbose
+ }
+
+ if a.log != nil {
+ fmt.Fprintln(a.log, "==== Starting analysis")
+ }
+
+ // Pointer analysis requires a complete program for soundness.
+ // Check to prevent accidental misconfiguration.
+ for _, pkg := range a.prog.AllPackages() {
+ // (This only checks that the package scope is complete,
+ // not that func bodies exist, but it's a good signal.)
+ if !pkg.Pkg.Complete() {
+ return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Pkg.Path())
+ }
+ }
+
+ if reflect := a.prog.ImportedPackage("reflect"); reflect != nil {
+ rV := reflect.Pkg.Scope().Lookup("Value")
+ a.reflectValueObj = rV
+ a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call")
+ a.reflectType = reflect.Pkg.Scope().Lookup("Type").Type().(*types.Named)
+ a.reflectRtypeObj = reflect.Pkg.Scope().Lookup("rtype")
+ a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type())
+
+ // Override flattening of reflect.Value, treating it like a basic type.
+ tReflectValue := a.reflectValueObj.Type()
+ a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}}
+
+ // Override shouldTrack of reflect.Value and *reflect.rtype.
+ // Always track pointers of these types.
+ a.trackTypes[tReflectValue] = true
+ a.trackTypes[a.reflectRtypePtr] = true
+
+ a.rtypes.SetHasher(a.hasher)
+ a.reflectZeros.SetHasher(a.hasher)
+ }
+ if runtime := a.prog.ImportedPackage("runtime"); runtime != nil {
+ a.runtimeSetFinalizer = runtime.Func("SetFinalizer")
+ }
+ a.computeTrackBits()
+
+ a.generate()
+ a.showCounts()
+
+ if optRenumber {
+ a.renumber()
+ }
+
+ N := len(a.nodes) // excludes solver-created nodes
+
+ if optHVN {
+ if debugHVNCrossCheck {
+ // Cross-check: run the solver once without
+ // optimization, once with, and compare the
+ // solutions.
+ savedConstraints := a.constraints
+
+ a.solve()
+ a.dumpSolution("A.pts", N)
+
+ // Restore.
+ a.constraints = savedConstraints
+ for _, n := range a.nodes {
+ n.solve = new(solverState)
+ }
+ a.nodes = a.nodes[:N]
+
+ // rtypes is effectively part of the solver state.
+ a.rtypes = typeutil.Map{}
+ a.rtypes.SetHasher(a.hasher)
+ }
+
+ a.hvn()
+ }
+
+ if debugHVNCrossCheck {
+ runtime.GC()
+ runtime.GC()
+ }
+
+ a.solve()
+
+ // Compare solutions.
+ if optHVN && debugHVNCrossCheck {
+ a.dumpSolution("B.pts", N)
+
+ if !diff("A.pts", "B.pts") {
+ return nil, fmt.Errorf("internal error: optimization changed solution")
+ }
+ }
+
+ // Create callgraph.Nodes in deterministic order.
+ if cg := a.result.CallGraph; cg != nil {
+ for _, caller := range a.cgnodes {
+ cg.CreateNode(caller.fn)
+ }
+ }
+
+ // Add dynamic edges to call graph.
+ var space [100]int
+ for _, caller := range a.cgnodes {
+ for _, site := range caller.sites {
+ for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
+ a.callEdge(caller, site, nodeid(callee))
+ }
+ }
+ }
+
+ return a.result, nil
+}
+
+// callEdge is called for each edge in the callgraph.
+// calleeid is the callee's object node (has otFunction flag).
+//
+func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) {
+ obj := a.nodes[calleeid].obj
+ if obj.flags&otFunction == 0 {
+ panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid))
+ }
+ callee := obj.cgn
+
+ if cg := a.result.CallGraph; cg != nil {
+ // TODO(adonovan): opt: I would expect duplicate edges
+ // (to wrappers) to arise due to the elimination of
+ // context information, but I haven't observed any.
+ // Understand this better.
+ callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn))
+ }
+
+ if a.log != nil {
+ fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee)
+ }
+
+ // Warn about calls to non-intrinsic external functions.
+ // TODO(adonovan): de-dup these messages.
+ if fn := callee.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil {
+ a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn)
+ a.warnf(fn.Pos(), " (declared here)")
+ }
+}
+
+// dumpSolution writes the PTS solution to the specified file.
+//
+// It only dumps the nodes that existed before solving. The order in
+// which solver-created nodes are created depends on pre-solver
+// optimization, so we can't include them in the cross-check.
+//
+func (a *analysis) dumpSolution(filename string, N int) {
+ f, err := os.Create(filename)
+ if err != nil {
+ panic(err)
+ }
+ for id, n := range a.nodes[:N] {
+ if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
+ panic(err)
+ }
+ var sep string
+ for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
+ if l >= N {
+ break
+ }
+ fmt.Fprintf(f, "%s%d", sep, l)
+ sep = " "
+ }
+ fmt.Fprintf(f, "} : %s\n", n.typ)
+ }
+ if err := f.Close(); err != nil {
+ panic(err)
+ }
+}
+
+// showCounts logs the size of the constraint system. A typical
+// optimized distribution is 65% copy, 13% load, 11% addr, 5%
+// offsetAddr, 4% store, 2% others.
+//
+func (a *analysis) showCounts() {
+ if a.log != nil {
+ counts := make(map[reflect.Type]int)
+ for _, c := range a.constraints {
+ counts[reflect.TypeOf(c)]++
+ }
+ fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
+ var lines []string
+ for t, n := range counts {
+ line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
+ lines = append(lines, line)
+ }
+ sort.Sort(sort.Reverse(sort.StringSlice(lines)))
+ for _, line := range lines {
+ fmt.Fprintf(a.log, "\t%s\n", line)
+ }
+
+ fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
+
+ // Show number of pointer equivalence classes.
+ m := make(map[*solverState]bool)
+ for _, n := range a.nodes {
+ m[n.solve] = true
+ }
+ fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
+ }
+}