.config/coc/extensions/coc-go-data/tools/pkg/mod/golang.org/x/tools@v0.0.0-20201105173854-bc9fc8d8c4bc/godoc/index.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // This file contains the infrastructure to create an
   6 // identifier and full-text index for a set of Go files.
   7 //
   8 // Algorithm for identifier index:
   9 // - traverse all .go files of the file tree specified by root
  10 // - for each identifier (word) encountered, collect all occurrences (spots)
  11 //   into a list; this produces a list of spots for each word
  12 // - reduce the lists: from a list of spots to a list of FileRuns,
  13 //   and from a list of FileRuns into a list of PakRuns
  14 // - make a HitList from the PakRuns
  15 //
  16 // Details:
  17 // - keep two lists per word: one containing package-level declarations
  18 //   that have snippets, and one containing all other spots
  19 // - keep the snippets in a separate table indexed by snippet index
  20 //   and store the snippet index in place of the line number in a SpotInfo
  21 //   (the line number for spots with snippets is stored in the snippet)
  22 // - at the end, create lists of alternative spellings for a given
  23 //   word
  24 //
  25 // Algorithm for full text index:
  26 // - concatenate all source code in a byte buffer (in memory)
  27 // - add the files to a file set in lockstep as they are added to the byte
  28 //   buffer such that a byte buffer offset corresponds to the Pos value for
  29 //   that file location
  30 // - create a suffix array from the concatenated sources
  31 //
  32 // String lookup in full text index:
  33 // - use the suffix array to lookup a string's offsets - the offsets
  34 //   correspond to the Pos values relative to the file set
  35 // - translate the Pos values back into file and line information and
  36 //   sort the result
  37
  38 package godoc
  39
  40 import (
  41         "bufio"
  42         "bytes"
  43         "encoding/gob"
  44         "errors"
  45         "fmt"
  46         "go/ast"
  47         "go/doc"
  48         "go/parser"
  49         "go/token"
  50         "index/suffixarray"
  51         "io"
  52         "log"
  53         "os"
  54         pathpkg "path"
  55         "path/filepath"
  56         "regexp"
  57         "runtime"
  58         "sort"
  59         "strconv"
  60         "strings"
  61         "sync"
  62         "time"
  63         "unicode"
  64
  65         "golang.org/x/tools/godoc/util"
  66         "golang.org/x/tools/godoc/vfs"
  67 )
  68
  69 // ----------------------------------------------------------------------------
  70 // InterfaceSlice is a helper type for sorting interface
  71 // slices according to some slice-specific sort criteria.
  72
  73 type comparer func(x, y interface{}) bool
  74
  75 type interfaceSlice struct {
  76         slice []interface{}
  77         less  comparer
  78 }
  79
  80 // ----------------------------------------------------------------------------
  81 // RunList
  82
  83 // A RunList is a list of entries that can be sorted according to some
  84 // criteria. A RunList may be compressed by grouping "runs" of entries
  85 // which are equal (according to the sort criteria) into a new RunList of
  86 // runs. For instance, a RunList containing pairs (x, y) may be compressed
  87 // into a RunList containing pair runs (x, {y}) where each run consists of
  88 // a list of y's with the same x.
  89 type RunList []interface{}
  90
  91 func (h RunList) sort(less comparer) {
  92         sort.Sort(&interfaceSlice{h, less})
  93 }
  94
  95 func (p *interfaceSlice) Len() int           { return len(p.slice) }
  96 func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
  97 func (p *interfaceSlice) Swap(i, j int)      { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
  98
  99 // Compress entries which are the same according to a sort criteria
 100 // (specified by less) into "runs".
 101 func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
 102         if len(h) == 0 {
 103                 return nil
 104         }
 105         // len(h) > 0
 106
 107         // create runs of entries with equal values
 108         h.sort(less)
 109
 110         // for each run, make a new run object and collect them in a new RunList
 111         var hh RunList
 112         i, x := 0, h[0]
 113         for j, y := range h {
 114                 if less(x, y) {
 115                         hh = append(hh, newRun(h[i:j]))
 116                         i, x = j, h[j] // start a new run
 117                 }
 118         }
 119         // add final run, if any
 120         if i < len(h) {
 121                 hh = append(hh, newRun(h[i:]))
 122         }
 123
 124         return hh
 125 }
 126
 127 // ----------------------------------------------------------------------------
 128 // KindRun
 129
 130 // Debugging support. Disable to see multiple entries per line.
 131 const removeDuplicates = true
 132
 133 // A KindRun is a run of SpotInfos of the same kind in a given file.
 134 // The kind (3 bits) is stored in each SpotInfo element; to find the
 135 // kind of a KindRun, look at any of its elements.
 136 type KindRun []SpotInfo
 137
 138 // KindRuns are sorted by line number or index. Since the isIndex bit
 139 // is always the same for all infos in one list we can compare lori's.
 140 func (k KindRun) Len() int           { return len(k) }
 141 func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
 142 func (k KindRun) Swap(i, j int)      { k[i], k[j] = k[j], k[i] }
 143
 144 // FileRun contents are sorted by Kind for the reduction into KindRuns.
 145 func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
 146
 147 // newKindRun allocates a new KindRun from the SpotInfo run h.
 148 func newKindRun(h RunList) interface{} {
 149         run := make(KindRun, len(h))
 150         for i, x := range h {
 151                 run[i] = x.(SpotInfo)
 152         }
 153
 154         // Spots were sorted by file and kind to create this run.
 155         // Within this run, sort them by line number or index.
 156         sort.Sort(run)
 157
 158         if removeDuplicates {
 159                 // Since both the lori and kind field must be
 160                 // same for duplicates, and since the isIndex
 161                 // bit is always the same for all infos in one
 162                 // list we can simply compare the entire info.
 163                 k := 0
 164                 prev := SpotInfo(1<<32 - 1) // an unlikely value
 165                 for _, x := range run {
 166                         if x != prev {
 167                                 run[k] = x
 168                                 k++
 169                                 prev = x
 170                         }
 171                 }
 172                 run = run[0:k]
 173         }
 174
 175         return run
 176 }
 177
 178 // ----------------------------------------------------------------------------
 179 // FileRun
 180
 181 // A Pak describes a Go package.
 182 type Pak struct {
 183         Path string // path of directory containing the package
 184         Name string // package name as declared by package clause
 185 }
 186
 187 // Paks are sorted by name (primary key) and by import path (secondary key).
 188 func (p *Pak) less(q *Pak) bool {
 189         return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
 190 }
 191
 192 // A File describes a Go file.
 193 type File struct {
 194         Name string // directory-local file name
 195         Pak  *Pak   // the package to which the file belongs
 196 }
 197
 198 // Path returns the file path of f.
 199 func (f *File) Path() string {
 200         return pathpkg.Join(f.Pak.Path, f.Name)
 201 }
 202
 203 // A Spot describes a single occurrence of a word.
 204 type Spot struct {
 205         File *File
 206         Info SpotInfo
 207 }
 208
 209 // A FileRun is a list of KindRuns belonging to the same file.
 210 type FileRun struct {
 211         File   *File
 212         Groups []KindRun
 213 }
 214
 215 // Spots are sorted by file path for the reduction into FileRuns.
 216 func lessSpot(x, y interface{}) bool {
 217         fx := x.(Spot).File
 218         fy := y.(Spot).File
 219         // same as "return fx.Path() < fy.Path()" but w/o computing the file path first
 220         px := fx.Pak.Path
 221         py := fy.Pak.Path
 222         return px < py || px == py && fx.Name < fy.Name
 223 }
 224
 225 // newFileRun allocates a new FileRun from the Spot run h.
 226 func newFileRun(h RunList) interface{} {
 227         file := h[0].(Spot).File
 228
 229         // reduce the list of Spots into a list of KindRuns
 230         h1 := make(RunList, len(h))
 231         for i, x := range h {
 232                 h1[i] = x.(Spot).Info
 233         }
 234         h2 := h1.reduce(lessKind, newKindRun)
 235
 236         // create the FileRun
 237         groups := make([]KindRun, len(h2))
 238         for i, x := range h2 {
 239                 groups[i] = x.(KindRun)
 240         }
 241         return &FileRun{file, groups}
 242 }
 243
 244 // ----------------------------------------------------------------------------
 245 // PakRun
 246
 247 // A PakRun describes a run of *FileRuns of a package.
 248 type PakRun struct {
 249         Pak   *Pak
 250         Files []*FileRun
 251 }
 252
 253 // Sorting support for files within a PakRun.
 254 func (p *PakRun) Len() int           { return len(p.Files) }
 255 func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
 256 func (p *PakRun) Swap(i, j int)      { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
 257
 258 // FileRuns are sorted by package for the reduction into PakRuns.
 259 func lessFileRun(x, y interface{}) bool {
 260         return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
 261 }
 262
 263 // newPakRun allocates a new PakRun from the *FileRun run h.
 264 func newPakRun(h RunList) interface{} {
 265         pak := h[0].(*FileRun).File.Pak
 266         files := make([]*FileRun, len(h))
 267         for i, x := range h {
 268                 files[i] = x.(*FileRun)
 269         }
 270         run := &PakRun{pak, files}
 271         sort.Sort(run) // files were sorted by package; sort them by file now
 272         return run
 273 }
 274
 275 // ----------------------------------------------------------------------------
 276 // HitList
 277
 278 // A HitList describes a list of PakRuns.
 279 type HitList []*PakRun
 280
 281 // PakRuns are sorted by package.
 282 func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
 283
 284 func reduce(h0 RunList) HitList {
 285         // reduce a list of Spots into a list of FileRuns
 286         h1 := h0.reduce(lessSpot, newFileRun)
 287         // reduce a list of FileRuns into a list of PakRuns
 288         h2 := h1.reduce(lessFileRun, newPakRun)
 289         // sort the list of PakRuns by package
 290         h2.sort(lessPakRun)
 291         // create a HitList
 292         h := make(HitList, len(h2))
 293         for i, p := range h2 {
 294                 h[i] = p.(*PakRun)
 295         }
 296         return h
 297 }
 298
 299 // filter returns a new HitList created by filtering
 300 // all PakRuns from h that have a matching pakname.
 301 func (h HitList) filter(pakname string) HitList {
 302         var hh HitList
 303         for _, p := range h {
 304                 if p.Pak.Name == pakname {
 305                         hh = append(hh, p)
 306                 }
 307         }
 308         return hh
 309 }
 310
 311 // ----------------------------------------------------------------------------
 312 // AltWords
 313
 314 type wordPair struct {
 315         canon string // canonical word spelling (all lowercase)
 316         alt   string // alternative spelling
 317 }
 318
 319 // An AltWords describes a list of alternative spellings for a
 320 // canonical (all lowercase) spelling of a word.
 321 type AltWords struct {
 322         Canon string   // canonical word spelling (all lowercase)
 323         Alts  []string // alternative spelling for the same word
 324 }
 325
 326 // wordPairs are sorted by their canonical spelling.
 327 func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
 328
 329 // newAltWords allocates a new AltWords from the *wordPair run h.
 330 func newAltWords(h RunList) interface{} {
 331         canon := h[0].(*wordPair).canon
 332         alts := make([]string, len(h))
 333         for i, x := range h {
 334                 alts[i] = x.(*wordPair).alt
 335         }
 336         return &AltWords{canon, alts}
 337 }
 338
 339 func (a *AltWords) filter(s string) *AltWords {
 340         var alts []string
 341         for _, w := range a.Alts {
 342                 if w != s {
 343                         alts = append(alts, w)
 344                 }
 345         }
 346         if len(alts) > 0 {
 347                 return &AltWords{a.Canon, alts}
 348         }
 349         return nil
 350 }
 351
 352 // Ident stores information about external identifiers in order to create
 353 // links to package documentation.
 354 type Ident struct {
 355         Path    string // e.g. "net/http"
 356         Package string // e.g. "http"
 357         Name    string // e.g. "NewRequest"
 358         Doc     string // e.g. "NewRequest returns a new Request..."
 359 }
 360
 361 // byImportCount sorts the given slice of Idents by the import
 362 // counts of the packages to which they belong.
 363 type byImportCount struct {
 364         Idents      []Ident
 365         ImportCount map[string]int
 366 }
 367
 368 func (ic byImportCount) Len() int {
 369         return len(ic.Idents)
 370 }
 371
 372 func (ic byImportCount) Less(i, j int) bool {
 373         ri := ic.ImportCount[ic.Idents[i].Path]
 374         rj := ic.ImportCount[ic.Idents[j].Path]
 375         if ri == rj {
 376                 return ic.Idents[i].Path < ic.Idents[j].Path
 377         }
 378         return ri > rj
 379 }
 380
 381 func (ic byImportCount) Swap(i, j int) {
 382         ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
 383 }
 384
 385 func (ic byImportCount) String() string {
 386         buf := bytes.NewBuffer([]byte("["))
 387         for _, v := range ic.Idents {
 388                 buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
 389         }
 390         buf.WriteString("\n]")
 391         return buf.String()
 392 }
 393
 394 // filter creates a new Ident list where the results match the given
 395 // package name.
 396 func (ic byImportCount) filter(pakname string) []Ident {
 397         if ic.Idents == nil {
 398                 return nil
 399         }
 400         var res []Ident
 401         for _, i := range ic.Idents {
 402                 if i.Package == pakname {
 403                         res = append(res, i)
 404                 }
 405         }
 406         return res
 407 }
 408
 409 // top returns the top n identifiers.
 410 func (ic byImportCount) top(n int) []Ident {
 411         if len(ic.Idents) > n {
 412                 return ic.Idents[:n]
 413         }
 414         return ic.Idents
 415 }
 416
 417 // ----------------------------------------------------------------------------
 418 // Indexer
 419
 420 type IndexResult struct {
 421         Decls  RunList // package-level declarations (with snippets)
 422         Others RunList // all other occurrences
 423 }
 424
 425 // Statistics provides statistics information for an index.
 426 type Statistics struct {
 427         Bytes int // total size of indexed source files
 428         Files int // number of indexed source files
 429         Lines int // number of lines (all files)
 430         Words int // number of different identifiers
 431         Spots int // number of identifier occurrences
 432 }
 433
 434 // An Indexer maintains the data structures and provides the machinery
 435 // for indexing .go files under a file tree. It implements the path.Visitor
 436 // interface for walking file trees, and the ast.Visitor interface for
 437 // walking Go ASTs.
 438 type Indexer struct {
 439         c          *Corpus
 440         fset       *token.FileSet // file set for all indexed files
 441         fsOpenGate chan bool      // send pre fs.Open; receive on close
 442
 443         mu            sync.Mutex              // guards all the following
 444         sources       bytes.Buffer            // concatenated sources
 445         strings       map[string]string       // interned string
 446         packages      map[Pak]*Pak            // interned *Paks
 447         words         map[string]*IndexResult // RunLists of Spots
 448         snippets      []*Snippet              // indices are stored in SpotInfos
 449         current       *token.File             // last file added to file set
 450         file          *File                   // AST for current file
 451         decl          ast.Decl                // AST for current decl
 452         stats         Statistics
 453         throttle      *util.Throttle
 454         importCount   map[string]int                 // package path ("net/http") => count
 455         packagePath   map[string]map[string]bool     // "template" => "text/template" => true
 456         exports       map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
 457         curPkgExports map[string]SpotKind
 458         idents        map[SpotKind]map[string][]Ident // kind => name => list of Idents
 459 }
 460
 461 func (x *Indexer) intern(s string) string {
 462         if s, ok := x.strings[s]; ok {
 463                 return s
 464         }
 465         x.strings[s] = s
 466         return s
 467 }
 468
 469 func (x *Indexer) lookupPackage(path, name string) *Pak {
 470         // In the source directory tree, more than one package may
 471         // live in the same directory. For the packages map, construct
 472         // a key that includes both the directory path and the package
 473         // name.
 474         key := Pak{Path: x.intern(path), Name: x.intern(name)}
 475         pak := x.packages[key]
 476         if pak == nil {
 477                 pak = &key
 478                 x.packages[key] = pak
 479         }
 480         return pak
 481 }
 482
 483 func (x *Indexer) addSnippet(s *Snippet) int {
 484         index := len(x.snippets)
 485         x.snippets = append(x.snippets, s)
 486         return index
 487 }
 488
 489 func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
 490         if id == nil {
 491                 return
 492         }
 493         name := x.intern(id.Name)
 494
 495         switch kind {
 496         case TypeDecl, FuncDecl, ConstDecl, VarDecl:
 497                 x.curPkgExports[name] = kind
 498         }
 499
 500         lists, found := x.words[name]
 501         if !found {
 502                 lists = new(IndexResult)
 503                 x.words[name] = lists
 504         }
 505
 506         if kind == Use || x.decl == nil {
 507                 if x.c.IndexGoCode {
 508                         // not a declaration or no snippet required
 509                         info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
 510                         lists.Others = append(lists.Others, Spot{x.file, info})
 511                 }
 512         } else {
 513                 // a declaration with snippet
 514                 index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
 515                 info := makeSpotInfo(kind, index, true)
 516                 lists.Decls = append(lists.Decls, Spot{x.file, info})
 517         }
 518
 519         x.stats.Spots++
 520 }
 521
 522 func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) {
 523         for _, f := range flist.List {
 524                 x.decl = nil // no snippets for fields
 525                 for _, name := range f.Names {
 526                         x.visitIdent(kind, name)
 527                 }
 528                 ast.Walk(x, f.Type)
 529                 // ignore tag - not indexed at the moment
 530         }
 531 }
 532
 533 func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
 534         switch n := spec.(type) {
 535         case *ast.ImportSpec:
 536                 x.visitIdent(ImportDecl, n.Name)
 537                 if n.Path != nil {
 538                         if imp, err := strconv.Unquote(n.Path.Value); err == nil {
 539                                 x.importCount[x.intern(imp)]++
 540                         }
 541                 }
 542
 543         case *ast.ValueSpec:
 544                 for _, n := range n.Names {
 545                         x.visitIdent(kind, n)
 546                 }
 547                 ast.Walk(x, n.Type)
 548                 for _, v := range n.Values {
 549                         ast.Walk(x, v)
 550                 }
 551
 552         case *ast.TypeSpec:
 553                 x.visitIdent(TypeDecl, n.Name)
 554                 ast.Walk(x, n.Type)
 555         }
 556 }
 557
 558 func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
 559         kind := VarDecl
 560         if decl.Tok == token.CONST {
 561                 kind = ConstDecl
 562         }
 563         x.decl = decl
 564         for _, s := range decl.Specs {
 565                 x.visitSpec(kind, s)
 566         }
 567 }
 568
 569 func (x *Indexer) Visit(node ast.Node) ast.Visitor {
 570         switch n := node.(type) {
 571         case nil:
 572                 // nothing to do
 573
 574         case *ast.Ident:
 575                 x.visitIdent(Use, n)
 576
 577         case *ast.FieldList:
 578                 x.visitFieldList(VarDecl, n)
 579
 580         case *ast.InterfaceType:
 581                 x.visitFieldList(MethodDecl, n.Methods)
 582
 583         case *ast.DeclStmt:
 584                 // local declarations should only be *ast.GenDecls;
 585                 // ignore incorrect ASTs
 586                 if decl, ok := n.Decl.(*ast.GenDecl); ok {
 587                         x.decl = nil // no snippets for local declarations
 588                         x.visitGenDecl(decl)
 589                 }
 590
 591         case *ast.GenDecl:
 592                 x.decl = n
 593                 x.visitGenDecl(n)
 594
 595         case *ast.FuncDecl:
 596                 kind := FuncDecl
 597                 if n.Recv != nil {
 598                         kind = MethodDecl
 599                         ast.Walk(x, n.Recv)
 600                 }
 601                 x.decl = n
 602                 x.visitIdent(kind, n.Name)
 603                 ast.Walk(x, n.Type)
 604                 if n.Body != nil {
 605                         ast.Walk(x, n.Body)
 606                 }
 607
 608         case *ast.File:
 609                 x.decl = nil
 610                 x.visitIdent(PackageClause, n.Name)
 611                 for _, d := range n.Decls {
 612                         ast.Walk(x, d)
 613                 }
 614
 615         default:
 616                 return x
 617         }
 618
 619         return nil
 620 }
 621
 622 // addFile adds a file to the index if possible and returns the file set file
 623 // and the file's AST if it was successfully parsed as a Go file. If addFile
 624 // failed (that is, if the file was not added), it returns file == nil.
 625 func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
 626         defer f.Close()
 627
 628         // The file set's base offset and x.sources size must be in lock-step;
 629         // this permits the direct mapping of suffix array lookup results to
 630         // to corresponding Pos values.
 631         //
 632         // When a file is added to the file set, its offset base increases by
 633         // the size of the file + 1; and the initial base offset is 1. Add an
 634         // extra byte to the sources here.
 635         x.sources.WriteByte(0)
 636
 637         // If the sources length doesn't match the file set base at this point
 638         // the file set implementation changed or we have another error.
 639         base := x.fset.Base()
 640         if x.sources.Len() != base {
 641                 panic("internal error: file base incorrect")
 642         }
 643
 644         // append file contents (src) to x.sources
 645         if _, err := x.sources.ReadFrom(f); err == nil {
 646                 src := x.sources.Bytes()[base:]
 647
 648                 if goFile {
 649                         // parse the file and in the process add it to the file set
 650                         if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
 651                                 file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
 652                                 return
 653                         }
 654                         // file has parse errors, and the AST may be incorrect -
 655                         // set lines information explicitly and index as ordinary
 656                         // text file (cannot fall through to the text case below
 657                         // because the file has already been added to the file set
 658                         // by the parser)
 659                         file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
 660                         file.SetLinesForContent(src)
 661                         ast = nil
 662                         return
 663                 }
 664
 665                 if util.IsText(src) {
 666                         // only add the file to the file set (for the full text index)
 667                         file = x.fset.AddFile(filename, x.fset.Base(), len(src))
 668                         file.SetLinesForContent(src)
 669                         return
 670                 }
 671         }
 672
 673         // discard possibly added data
 674         x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
 675         return
 676 }
 677
 678 // Design note: Using an explicit white list of permitted files for indexing
 679 // makes sure that the important files are included and massively reduces the
 680 // number of files to index. The advantage over a blacklist is that unexpected
 681 // (non-blacklisted) files won't suddenly explode the index.
 682
 683 // Files are whitelisted if they have a file name or extension
 684 // present as key in whitelisted.
 685 var whitelisted = map[string]bool{
 686         ".bash":        true,
 687         ".c":           true,
 688         ".cc":          true,
 689         ".cpp":         true,
 690         ".cxx":         true,
 691         ".css":         true,
 692         ".go":          true,
 693         ".goc":         true,
 694         ".h":           true,
 695         ".hh":          true,
 696         ".hpp":         true,
 697         ".hxx":         true,
 698         ".html":        true,
 699         ".js":          true,
 700         ".out":         true,
 701         ".py":          true,
 702         ".s":           true,
 703         ".sh":          true,
 704         ".txt":         true,
 705         ".xml":         true,
 706         "AUTHORS":      true,
 707         "CONTRIBUTORS": true,
 708         "LICENSE":      true,
 709         "Makefile":     true,
 710         "PATENTS":      true,
 711         "README":       true,
 712 }
 713
 714 // isWhitelisted returns true if a file is on the list
 715 // of "permitted" files for indexing. The filename must
 716 // be the directory-local name of the file.
 717 func isWhitelisted(filename string) bool {
 718         key := pathpkg.Ext(filename)
 719         if key == "" {
 720                 // file has no extension - use entire filename
 721                 key = filename
 722         }
 723         return whitelisted[key]
 724 }
 725
 726 func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) {
 727         pkgName := x.intern(astFile.Name.Name)
 728         if pkgName == "main" {
 729                 return
 730         }
 731         pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
 732         astPkg := ast.Package{
 733                 Name: pkgName,
 734                 Files: map[string]*ast.File{
 735                         filename: astFile,
 736                 },
 737         }
 738         var m doc.Mode
 739         docPkg := doc.New(&astPkg, dirname, m)
 740         addIdent := func(sk SpotKind, name string, docstr string) {
 741                 if x.idents[sk] == nil {
 742                         x.idents[sk] = make(map[string][]Ident)
 743                 }
 744                 name = x.intern(name)
 745                 x.idents[sk][name] = append(x.idents[sk][name], Ident{
 746                         Path:    pkgPath,
 747                         Package: pkgName,
 748                         Name:    name,
 749                         Doc:     doc.Synopsis(docstr),
 750                 })
 751         }
 752
 753         if x.idents[PackageClause] == nil {
 754                 x.idents[PackageClause] = make(map[string][]Ident)
 755         }
 756         // List of words under which the package identifier will be stored.
 757         // This includes the package name and the components of the directory
 758         // in which it resides.
 759         words := strings.Split(pathpkg.Dir(pkgPath), "/")
 760         if words[0] == "." {
 761                 words = []string{}
 762         }
 763         name := x.intern(docPkg.Name)
 764         synopsis := doc.Synopsis(docPkg.Doc)
 765         words = append(words, name)
 766         pkgIdent := Ident{
 767                 Path:    pkgPath,
 768                 Package: pkgName,
 769                 Name:    name,
 770                 Doc:     synopsis,
 771         }
 772         for _, word := range words {
 773                 word = x.intern(word)
 774                 found := false
 775                 pkgs := x.idents[PackageClause][word]
 776                 for i, p := range pkgs {
 777                         if p.Path == pkgPath {
 778                                 if docPkg.Doc != "" {
 779                                         p.Doc = synopsis
 780                                         pkgs[i] = p
 781                                 }
 782                                 found = true
 783                                 break
 784                         }
 785                 }
 786                 if !found {
 787                         x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
 788                 }
 789         }
 790
 791         for _, c := range docPkg.Consts {
 792                 for _, name := range c.Names {
 793                         addIdent(ConstDecl, name, c.Doc)
 794                 }
 795         }
 796         for _, t := range docPkg.Types {
 797                 addIdent(TypeDecl, t.Name, t.Doc)
 798                 for _, c := range t.Consts {
 799                         for _, name := range c.Names {
 800                                 addIdent(ConstDecl, name, c.Doc)
 801                         }
 802                 }
 803                 for _, v := range t.Vars {
 804                         for _, name := range v.Names {
 805                                 addIdent(VarDecl, name, v.Doc)
 806                         }
 807                 }
 808                 for _, f := range t.Funcs {
 809                         addIdent(FuncDecl, f.Name, f.Doc)
 810                 }
 811                 for _, f := range t.Methods {
 812                         addIdent(MethodDecl, f.Name, f.Doc)
 813                         // Change the name of methods to be "<typename>.<methodname>".
 814                         // They will still be indexed as <methodname>.
 815                         idents := x.idents[MethodDecl][f.Name]
 816                         idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
 817                 }
 818         }
 819         for _, v := range docPkg.Vars {
 820                 for _, name := range v.Names {
 821                         addIdent(VarDecl, name, v.Doc)
 822                 }
 823         }
 824         for _, f := range docPkg.Funcs {
 825                 addIdent(FuncDecl, f.Name, f.Doc)
 826         }
 827 }
 828
 829 func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) {
 830         pkgName := astFile.Name.Name
 831
 832         if x.c.IndexGoCode {
 833                 x.current = file
 834                 pak := x.lookupPackage(dirname, pkgName)
 835                 x.file = &File{filename, pak}
 836                 ast.Walk(x, astFile)
 837         }
 838
 839         if x.c.IndexDocs {
 840                 // Test files are already filtered out in visitFile if IndexGoCode and
 841                 // IndexFullText are false.  Otherwise, check here.
 842                 isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) &&
 843                         (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/"))
 844                 if !isTestFile {
 845                         x.indexDocs(dirname, filename, astFile)
 846                 }
 847         }
 848
 849         ppKey := x.intern(pkgName)
 850         if _, ok := x.packagePath[ppKey]; !ok {
 851                 x.packagePath[ppKey] = make(map[string]bool)
 852         }
 853         pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
 854         x.packagePath[ppKey][pkgPath] = true
 855
 856         // Merge in exported symbols found walking this file into
 857         // the map for that package.
 858         if len(x.curPkgExports) > 0 {
 859                 dest, ok := x.exports[pkgPath]
 860                 if !ok {
 861                         dest = make(map[string]SpotKind)
 862                         x.exports[pkgPath] = dest
 863                 }
 864                 for k, v := range x.curPkgExports {
 865                         dest[k] = v
 866                 }
 867         }
 868 }
 869
 870 func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
 871         if fi.IsDir() || !x.c.IndexEnabled {
 872                 return
 873         }
 874
 875         filename := pathpkg.Join(dirname, fi.Name())
 876         goFile := isGoFile(fi)
 877
 878         switch {
 879         case x.c.IndexFullText:
 880                 if !isWhitelisted(fi.Name()) {
 881                         return
 882                 }
 883         case x.c.IndexGoCode:
 884                 if !goFile {
 885                         return
 886                 }
 887         case x.c.IndexDocs:
 888                 if !goFile ||
 889                         strings.HasSuffix(fi.Name(), "_test.go") ||
 890                         strings.HasPrefix(dirname, "/test/") {
 891                         return
 892                 }
 893         default:
 894                 // No indexing turned on.
 895                 return
 896         }
 897
 898         x.fsOpenGate <- true
 899         defer func() { <-x.fsOpenGate }()
 900
 901         // open file
 902         f, err := x.c.fs.Open(filename)
 903         if err != nil {
 904                 return
 905         }
 906
 907         x.mu.Lock()
 908         defer x.mu.Unlock()
 909
 910         x.throttle.Throttle()
 911
 912         x.curPkgExports = make(map[string]SpotKind)
 913         file, fast := x.addFile(f, filename, goFile)
 914         if file == nil {
 915                 return // addFile failed
 916         }
 917
 918         if fast != nil {
 919                 x.indexGoFile(dirname, fi.Name(), file, fast)
 920         }
 921
 922         // update statistics
 923         x.stats.Bytes += file.Size()
 924         x.stats.Files++
 925         x.stats.Lines += file.LineCount()
 926 }
 927
 928 // indexOptions contains information that affects the contents of an index.
 929 type indexOptions struct {
 930         // Docs provides documentation search results.
 931         // It is only consulted if IndexEnabled is true.
 932         // The default values is true.
 933         Docs bool
 934
 935         // GoCode provides Go source code search results.
 936         // It is only consulted if IndexEnabled is true.
 937         // The default values is true.
 938         GoCode bool
 939
 940         // FullText provides search results from all files.
 941         // It is only consulted if IndexEnabled is true.
 942         // The default values is true.
 943         FullText bool
 944
 945         // MaxResults optionally specifies the maximum results for indexing.
 946         // The default is 1000.
 947         MaxResults int
 948 }
 949
 950 // ----------------------------------------------------------------------------
 951 // Index
 952
 953 type LookupResult struct {
 954         Decls  HitList // package-level declarations (with snippets)
 955         Others HitList // all other occurrences
 956 }
 957
 958 type Index struct {
 959         fset        *token.FileSet           // file set used during indexing; nil if no textindex
 960         suffixes    *suffixarray.Index       // suffixes for concatenated sources; nil if no textindex
 961         words       map[string]*LookupResult // maps words to hit lists
 962         alts        map[string]*AltWords     // maps canonical(words) to lists of alternative spellings
 963         snippets    []*Snippet               // all snippets, indexed by snippet index
 964         stats       Statistics
 965         importCount map[string]int                 // package path ("net/http") => count
 966         packagePath map[string]map[string]bool     // "template" => "text/template" => true
 967         exports     map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
 968         idents      map[SpotKind]map[string][]Ident
 969         opts        indexOptions
 970 }
 971
 972 func canonical(w string) string { return strings.ToLower(w) }
 973
 974 // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
 975 // consuming file descriptors, where some systems have low 256 or 512 limits.
 976 // Go should have a built-in way to cap fd usage under the ulimit.
 977 const (
 978         maxOpenFiles = 200
 979         maxOpenDirs  = 50
 980 )
 981
 982 func (c *Corpus) throttle() float64 {
 983         if c.IndexThrottle <= 0 {
 984                 return 0.9
 985         }
 986         if c.IndexThrottle > 1.0 {
 987                 return 1.0
 988         }
 989         return c.IndexThrottle
 990 }
 991
 992 // NewIndex creates a new index for the .go files provided by the corpus.
 993 func (c *Corpus) NewIndex() *Index {
 994         // initialize Indexer
 995         // (use some reasonably sized maps to start)
 996         x := &Indexer{
 997                 c:           c,
 998                 fset:        token.NewFileSet(),
 999                 fsOpenGate:  make(chan bool, maxOpenFiles),
1000                 strings:     make(map[string]string),
1001                 packages:    make(map[Pak]*Pak, 256),
1002                 words:       make(map[string]*IndexResult, 8192),
1003                 throttle:    util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
1004                 importCount: make(map[string]int),
1005                 packagePath: make(map[string]map[string]bool),
1006                 exports:     make(map[string]map[string]SpotKind),
1007                 idents:      make(map[SpotKind]map[string][]Ident, 4),
1008         }
1009
1010         // index all files in the directories given by dirnames
1011         var wg sync.WaitGroup // outstanding ReadDir + visitFile
1012         dirGate := make(chan bool, maxOpenDirs)
1013         for dirname := range c.fsDirnames() {
1014                 if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
1015                         continue
1016                 }
1017                 dirGate <- true
1018                 wg.Add(1)
1019                 go func(dirname string) {
1020                         defer func() { <-dirGate }()
1021                         defer wg.Done()
1022
1023                         list, err := c.fs.ReadDir(dirname)
1024                         if err != nil {
1025                                 log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
1026                                 return // ignore this directory
1027                         }
1028                         for _, fi := range list {
1029                                 wg.Add(1)
1030                                 go func(fi os.FileInfo) {
1031                                         defer wg.Done()
1032                                         x.visitFile(dirname, fi)
1033                                 }(fi)
1034                         }
1035                 }(dirname)
1036         }
1037         wg.Wait()
1038
1039         if !c.IndexFullText {
1040                 // the file set, the current file, and the sources are
1041                 // not needed after indexing if no text index is built -
1042                 // help GC and clear them
1043                 x.fset = nil
1044                 x.sources.Reset()
1045                 x.current = nil // contains reference to fset!
1046         }
1047
1048         // for each word, reduce the RunLists into a LookupResult;
1049         // also collect the word with its canonical spelling in a
1050         // word list for later computation of alternative spellings
1051         words := make(map[string]*LookupResult)
1052         var wlist RunList
1053         for w, h := range x.words {
1054                 decls := reduce(h.Decls)
1055                 others := reduce(h.Others)
1056                 words[w] = &LookupResult{
1057                         Decls:  decls,
1058                         Others: others,
1059                 }
1060                 wlist = append(wlist, &wordPair{canonical(w), w})
1061                 x.throttle.Throttle()
1062         }
1063         x.stats.Words = len(words)
1064
1065         // reduce the word list {canonical(w), w} into
1066         // a list of AltWords runs {canonical(w), {w}}
1067         alist := wlist.reduce(lessWordPair, newAltWords)
1068
1069         // convert alist into a map of alternative spellings
1070         alts := make(map[string]*AltWords)
1071         for i := 0; i < len(alist); i++ {
1072                 a := alist[i].(*AltWords)
1073                 alts[a.Canon] = a
1074         }
1075
1076         // create text index
1077         var suffixes *suffixarray.Index
1078         if c.IndexFullText {
1079                 suffixes = suffixarray.New(x.sources.Bytes())
1080         }
1081
1082         // sort idents by the number of imports of their respective packages
1083         for _, idMap := range x.idents {
1084                 for _, ir := range idMap {
1085                         sort.Sort(byImportCount{ir, x.importCount})
1086                 }
1087         }
1088
1089         return &Index{
1090                 fset:        x.fset,
1091                 suffixes:    suffixes,
1092                 words:       words,
1093                 alts:        alts,
1094                 snippets:    x.snippets,
1095                 stats:       x.stats,
1096                 importCount: x.importCount,
1097                 packagePath: x.packagePath,
1098                 exports:     x.exports,
1099                 idents:      x.idents,
1100                 opts: indexOptions{
1101                         Docs:       x.c.IndexDocs,
1102                         GoCode:     x.c.IndexGoCode,
1103                         FullText:   x.c.IndexFullText,
1104                         MaxResults: x.c.MaxResults,
1105                 },
1106         }
1107 }
1108
1109 var ErrFileIndexVersion = errors.New("file index version out of date")
1110
1111 const fileIndexVersion = 3
1112
1113 // fileIndex is the subset of Index that's gob-encoded for use by
1114 // Index.Write and Index.Read.
1115 type fileIndex struct {
1116         Version     int
1117         Words       map[string]*LookupResult
1118         Alts        map[string]*AltWords
1119         Snippets    []*Snippet
1120         Fulltext    bool
1121         Stats       Statistics
1122         ImportCount map[string]int
1123         PackagePath map[string]map[string]bool
1124         Exports     map[string]map[string]SpotKind
1125         Idents      map[SpotKind]map[string][]Ident
1126         Opts        indexOptions
1127 }
1128
1129 func (x *fileIndex) Write(w io.Writer) error {
1130         return gob.NewEncoder(w).Encode(x)
1131 }
1132
1133 func (x *fileIndex) Read(r io.Reader) error {
1134         return gob.NewDecoder(r).Decode(x)
1135 }
1136
1137 // WriteTo writes the index x to w.
1138 func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
1139         w = countingWriter{&n, w}
1140         fulltext := false
1141         if x.suffixes != nil {
1142                 fulltext = true
1143         }
1144         fx := fileIndex{
1145                 Version:     fileIndexVersion,
1146                 Words:       x.words,
1147                 Alts:        x.alts,
1148                 Snippets:    x.snippets,
1149                 Fulltext:    fulltext,
1150                 Stats:       x.stats,
1151                 ImportCount: x.importCount,
1152                 PackagePath: x.packagePath,
1153                 Exports:     x.exports,
1154                 Idents:      x.idents,
1155                 Opts:        x.opts,
1156         }
1157         if err := fx.Write(w); err != nil {
1158                 return 0, err
1159         }
1160         if fulltext {
1161                 encode := func(x interface{}) error {
1162                         return gob.NewEncoder(w).Encode(x)
1163                 }
1164                 if err := x.fset.Write(encode); err != nil {
1165                         return 0, err
1166                 }
1167                 if err := x.suffixes.Write(w); err != nil {
1168                         return 0, err
1169                 }
1170         }
1171         return n, nil
1172 }
1173
1174 // ReadFrom reads the index from r into x; x must not be nil.
1175 // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
1176 // If the index is from an old version, the error is ErrFileIndexVersion.
1177 func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
1178         // We use the ability to read bytes as a plausible surrogate for buffering.
1179         if _, ok := r.(io.ByteReader); !ok {
1180                 r = bufio.NewReader(r)
1181         }
1182         r = countingReader{&n, r.(byteReader)}
1183         var fx fileIndex
1184         if err := fx.Read(r); err != nil {
1185                 return n, err
1186         }
1187         if fx.Version != fileIndexVersion {
1188                 return 0, ErrFileIndexVersion
1189         }
1190         x.words = fx.Words
1191         x.alts = fx.Alts
1192         x.snippets = fx.Snippets
1193         x.stats = fx.Stats
1194         x.importCount = fx.ImportCount
1195         x.packagePath = fx.PackagePath
1196         x.exports = fx.Exports
1197         x.idents = fx.Idents
1198         x.opts = fx.Opts
1199         if fx.Fulltext {
1200                 x.fset = token.NewFileSet()
1201                 decode := func(x interface{}) error {
1202                         return gob.NewDecoder(r).Decode(x)
1203                 }
1204                 if err := x.fset.Read(decode); err != nil {
1205                         return n, err
1206                 }
1207                 x.suffixes = new(suffixarray.Index)
1208                 if err := x.suffixes.Read(r); err != nil {
1209                         return n, err
1210                 }
1211         }
1212         return n, nil
1213 }
1214
1215 // Stats returns index statistics.
1216 func (x *Index) Stats() Statistics {
1217         return x.stats
1218 }
1219
1220 // ImportCount returns a map from import paths to how many times they were seen.
1221 func (x *Index) ImportCount() map[string]int {
1222         return x.importCount
1223 }
1224
1225 // PackagePath returns a map from short package name to a set
1226 // of full package path names that use that short package name.
1227 func (x *Index) PackagePath() map[string]map[string]bool {
1228         return x.packagePath
1229 }
1230
1231 // Exports returns a map from full package path to exported
1232 // symbol name to its type.
1233 func (x *Index) Exports() map[string]map[string]SpotKind {
1234         return x.exports
1235 }
1236
1237 // Idents returns a map from identifier type to exported
1238 // symbol name to the list of identifiers matching that name.
1239 func (x *Index) Idents() map[SpotKind]map[string][]Ident {
1240         return x.idents
1241 }
1242
1243 func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
1244         match = x.words[w]
1245         alt = x.alts[canonical(w)]
1246         // remove current spelling from alternatives
1247         // (if there is no match, the alternatives do
1248         // not contain the current spelling)
1249         if match != nil && alt != nil {
1250                 alt = alt.filter(w)
1251         }
1252         return
1253 }
1254
1255 // isIdentifier reports whether s is a Go identifier.
1256 func isIdentifier(s string) bool {
1257         for i, ch := range s {
1258                 if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
1259                         continue
1260                 }
1261                 return false
1262         }
1263         return len(s) > 0
1264 }
1265
1266 // For a given query, which is either a single identifier or a qualified
1267 // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a
1268 // list of alternative spellings, and identifiers, if any. Any and all results
1269 // may be nil.  If the query syntax is wrong, an error is reported.
1270 func (x *Index) Lookup(query string) (*SearchResult, error) {
1271         ss := strings.Split(query, ".")
1272
1273         // check query syntax
1274         for _, s := range ss {
1275                 if !isIdentifier(s) {
1276                         return nil, errors.New("all query parts must be identifiers")
1277                 }
1278         }
1279         rslt := &SearchResult{
1280                 Query:  query,
1281                 Idents: make(map[SpotKind][]Ident, 5),
1282         }
1283         // handle simple and qualified identifiers
1284         switch len(ss) {
1285         case 1:
1286                 ident := ss[0]
1287                 rslt.Hit, rslt.Alt = x.lookupWord(ident)
1288                 if rslt.Hit != nil {
1289                         // found a match - filter packages with same name
1290                         // for the list of packages called ident, if any
1291                         rslt.Pak = rslt.Hit.Others.filter(ident)
1292                 }
1293                 for k, v := range x.idents {
1294                         const rsltLimit = 50
1295                         ids := byImportCount{v[ident], x.importCount}
1296                         rslt.Idents[k] = ids.top(rsltLimit)
1297                 }
1298
1299         case 2:
1300                 pakname, ident := ss[0], ss[1]
1301                 rslt.Hit, rslt.Alt = x.lookupWord(ident)
1302                 if rslt.Hit != nil {
1303                         // found a match - filter by package name
1304                         // (no paks - package names are not qualified)
1305                         decls := rslt.Hit.Decls.filter(pakname)
1306                         others := rslt.Hit.Others.filter(pakname)
1307                         rslt.Hit = &LookupResult{decls, others}
1308                 }
1309                 for k, v := range x.idents {
1310                         ids := byImportCount{v[ident], x.importCount}
1311                         rslt.Idents[k] = ids.filter(pakname)
1312                 }
1313
1314         default:
1315                 return nil, errors.New("query is not a (qualified) identifier")
1316         }
1317
1318         return rslt, nil
1319 }
1320
1321 func (x *Index) Snippet(i int) *Snippet {
1322         // handle illegal snippet indices gracefully
1323         if 0 <= i && i < len(x.snippets) {
1324                 return x.snippets[i]
1325         }
1326         return nil
1327 }
1328
1329 type positionList []struct {
1330         filename string
1331         line     int
1332 }
1333
1334 func (list positionList) Len() int           { return len(list) }
1335 func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
1336 func (list positionList) Swap(i, j int)      { list[i], list[j] = list[j], list[i] }
1337
1338 // unique returns the list sorted and with duplicate entries removed
1339 func unique(list []int) []int {
1340         sort.Ints(list)
1341         var last int
1342         i := 0
1343         for _, x := range list {
1344                 if i == 0 || x != last {
1345                         last = x
1346                         list[i] = x
1347                         i++
1348                 }
1349         }
1350         return list[0:i]
1351 }
1352
1353 // A FileLines value specifies a file and line numbers within that file.
1354 type FileLines struct {
1355         Filename string
1356         Lines    []int
1357 }
1358
1359 // LookupRegexp returns the number of matches and the matches where a regular
1360 // expression r is found in the full text index. At most n matches are
1361 // returned (thus found <= n).
1362 //
1363 func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
1364         if x.suffixes == nil || n <= 0 {
1365                 return
1366         }
1367         // n > 0
1368
1369         var list positionList
1370         // FindAllIndex may returns matches that span across file boundaries.
1371         // Such matches are unlikely, buf after eliminating them we may end up
1372         // with fewer than n matches. If we don't have enough at the end, redo
1373         // the search with an increased value n1, but only if FindAllIndex
1374         // returned all the requested matches in the first place (if it
1375         // returned fewer than that there cannot be more).
1376         for n1 := n; found < n; n1 += n - found {
1377                 found = 0
1378                 matches := x.suffixes.FindAllIndex(r, n1)
1379                 // compute files, exclude matches that span file boundaries,
1380                 // and map offsets to file-local offsets
1381                 list = make(positionList, len(matches))
1382                 for _, m := range matches {
1383                         // by construction, an offset corresponds to the Pos value
1384                         // for the file set - use it to get the file and line
1385                         p := token.Pos(m[0])
1386                         if file := x.fset.File(p); file != nil {
1387                                 if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
1388                                         // match [m[0], m[1]) is within the file boundaries
1389                                         list[found].filename = file.Name()
1390                                         list[found].line = file.Line(p)
1391                                         found++
1392                                 }
1393                         }
1394                 }
1395                 if found == n || len(matches) < n1 {
1396                         // found all matches or there's no chance to find more
1397                         break
1398                 }
1399         }
1400         list = list[0:found]
1401         sort.Sort(list) // sort by filename
1402
1403         // collect matches belonging to the same file
1404         var last string
1405         var lines []int
1406         addLines := func() {
1407                 if len(lines) > 0 {
1408                         // remove duplicate lines
1409                         result = append(result, FileLines{last, unique(lines)})
1410                         lines = nil
1411                 }
1412         }
1413         for _, m := range list {
1414                 if m.filename != last {
1415                         addLines()
1416                         last = m.filename
1417                 }
1418                 lines = append(lines, m.line)
1419         }
1420         addLines()
1421
1422         return
1423 }
1424
1425 // InvalidateIndex should be called whenever any of the file systems
1426 // under godoc's observation change so that the indexer is kicked on.
1427 func (c *Corpus) invalidateIndex() {
1428         c.fsModified.Set(nil)
1429         c.refreshMetadata()
1430 }
1431
1432 // feedDirnames feeds the directory names of all directories
1433 // under the file system given by root to channel c.
1434 //
1435 func (c *Corpus) feedDirnames(ch chan<- string) {
1436         if dir, _ := c.fsTree.Get(); dir != nil {
1437                 for d := range dir.(*Directory).iter(false) {
1438                         ch <- d.Path
1439                 }
1440         }
1441 }
1442
1443 // fsDirnames() returns a channel sending all directory names
1444 // of all the file systems under godoc's observation.
1445 //
1446 func (c *Corpus) fsDirnames() <-chan string {
1447         ch := make(chan string, 256) // buffered for fewer context switches
1448         go func() {
1449                 c.feedDirnames(ch)
1450                 close(ch)
1451         }()
1452         return ch
1453 }
1454
1455 // CompatibleWith reports whether the Index x is compatible with the corpus
1456 // indexing options set in c.
1457 func (x *Index) CompatibleWith(c *Corpus) bool {
1458         return x.opts.Docs == c.IndexDocs &&
1459                 x.opts.GoCode == c.IndexGoCode &&
1460                 x.opts.FullText == c.IndexFullText &&
1461                 x.opts.MaxResults == c.MaxResults
1462 }
1463
1464 func (c *Corpus) readIndex(filenames string) error {
1465         matches, err := filepath.Glob(filenames)
1466         if err != nil {
1467                 return err
1468         } else if matches == nil {
1469                 return fmt.Errorf("no index files match %q", filenames)
1470         }
1471         sort.Strings(matches) // make sure files are in the right order
1472         files := make([]io.Reader, 0, len(matches))
1473         for _, filename := range matches {
1474                 f, err := os.Open(filename)
1475                 if err != nil {
1476                         return err
1477                 }
1478                 defer f.Close()
1479                 files = append(files, f)
1480         }
1481         return c.ReadIndexFrom(io.MultiReader(files...))
1482 }
1483
1484 // ReadIndexFrom sets the current index from the serialized version found in r.
1485 func (c *Corpus) ReadIndexFrom(r io.Reader) error {
1486         x := new(Index)
1487         if _, err := x.ReadFrom(r); err != nil {
1488                 return err
1489         }
1490         if !x.CompatibleWith(c) {
1491                 return fmt.Errorf("index file options are incompatible: %v", x.opts)
1492         }
1493         c.searchIndex.Set(x)
1494         return nil
1495 }
1496
1497 func (c *Corpus) UpdateIndex() {
1498         if c.Verbose {
1499                 log.Printf("updating index...")
1500         }
1501         start := time.Now()
1502         index := c.NewIndex()
1503         stop := time.Now()
1504         c.searchIndex.Set(index)
1505         if c.Verbose {
1506                 secs := stop.Sub(start).Seconds()
1507                 stats := index.Stats()
1508                 log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
1509                         secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
1510         }
1511         memstats := new(runtime.MemStats)
1512         runtime.ReadMemStats(memstats)
1513         if c.Verbose {
1514                 log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1515         }
1516         runtime.GC()
1517         runtime.ReadMemStats(memstats)
1518         if c.Verbose {
1519                 log.Printf("after  GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1520         }
1521 }
1522
1523 // RunIndexer runs forever, indexing.
1524 func (c *Corpus) RunIndexer() {
1525         // initialize the index from disk if possible
1526         if c.IndexFiles != "" {
1527                 c.initFSTree()
1528                 if err := c.readIndex(c.IndexFiles); err != nil {
1529                         log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
1530                 }
1531                 return
1532         }
1533
1534         // Repeatedly update the package directory tree and index.
1535         for {
1536                 c.initFSTree()
1537                 c.UpdateIndex()
1538                 if c.IndexInterval < 0 {
1539                         return
1540                 }
1541                 delay := 5 * time.Minute // by default, reindex every 5 minutes
1542                 if c.IndexInterval > 0 {
1543                         delay = c.IndexInterval
1544                 }
1545                 time.Sleep(delay)
1546         }
1547 }
1548
1549 type countingWriter struct {
1550         n *int64
1551         w io.Writer
1552 }
1553
1554 func (c countingWriter) Write(p []byte) (n int, err error) {
1555         n, err = c.w.Write(p)
1556         *c.n += int64(n)
1557         return
1558 }
1559
1560 type byteReader interface {
1561         io.Reader
1562         io.ByteReader
1563 }
1564
1565 type countingReader struct {
1566         n *int64
1567         r byteReader
1568 }
1569
1570 func (c countingReader) Read(p []byte) (n int, err error) {
1571         n, err = c.r.Read(p)
1572         *c.n += int64(n)
1573         return
1574 }
1575
1576 func (c countingReader) ReadByte() (b byte, err error) {
1577         b, err = c.r.ReadByte()
1578         *c.n += 1
1579         return
1580 }