1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This file contains the infrastructure to create an
6 // identifier and full-text index for a set of Go files.
8 // Algorithm for identifier index:
9 // - traverse all .go files of the file tree specified by root
10 // - for each identifier (word) encountered, collect all occurrences (spots)
11 // into a list; this produces a list of spots for each word
12 // - reduce the lists: from a list of spots to a list of FileRuns,
13 // and from a list of FileRuns into a list of PakRuns
14 // - make a HitList from the PakRuns
17 // - keep two lists per word: one containing package-level declarations
18 // that have snippets, and one containing all other spots
19 // - keep the snippets in a separate table indexed by snippet index
20 // and store the snippet index in place of the line number in a SpotInfo
21 // (the line number for spots with snippets is stored in the snippet)
22 // - at the end, create lists of alternative spellings for a given
25 // Algorithm for full text index:
26 // - concatenate all source code in a byte buffer (in memory)
27 // - add the files to a file set in lockstep as they are added to the byte
28 // buffer such that a byte buffer offset corresponds to the Pos value for
30 // - create a suffix array from the concatenated sources
32 // String lookup in full text index:
33 // - use the suffix array to lookup a string's offsets - the offsets
34 // correspond to the Pos values relative to the file set
35 // - translate the Pos values back into file and line information and
65 "golang.org/x/tools/godoc/util"
66 "golang.org/x/tools/godoc/vfs"
69 // ----------------------------------------------------------------------------
70 // InterfaceSlice is a helper type for sorting interface
71 // slices according to some slice-specific sort criteria.
73 type comparer func(x, y interface{}) bool
75 type interfaceSlice struct {
80 // ----------------------------------------------------------------------------
83 // A RunList is a list of entries that can be sorted according to some
84 // criteria. A RunList may be compressed by grouping "runs" of entries
85 // which are equal (according to the sort criteria) into a new RunList of
86 // runs. For instance, a RunList containing pairs (x, y) may be compressed
87 // into a RunList containing pair runs (x, {y}) where each run consists of
88 // a list of y's with the same x.
89 type RunList []interface{}
91 func (h RunList) sort(less comparer) {
92 sort.Sort(&interfaceSlice{h, less})
95 func (p *interfaceSlice) Len() int { return len(p.slice) }
96 func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
97 func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
99 // Compress entries which are the same according to a sort criteria
100 // (specified by less) into "runs".
101 func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
107 // create runs of entries with equal values
110 // for each run, make a new run object and collect them in a new RunList
113 for j, y := range h {
115 hh = append(hh, newRun(h[i:j]))
116 i, x = j, h[j] // start a new run
119 // add final run, if any
121 hh = append(hh, newRun(h[i:]))
127 // ----------------------------------------------------------------------------
130 // Debugging support. Disable to see multiple entries per line.
131 const removeDuplicates = true
133 // A KindRun is a run of SpotInfos of the same kind in a given file.
134 // The kind (3 bits) is stored in each SpotInfo element; to find the
135 // kind of a KindRun, look at any of its elements.
136 type KindRun []SpotInfo
138 // KindRuns are sorted by line number or index. Since the isIndex bit
139 // is always the same for all infos in one list we can compare lori's.
140 func (k KindRun) Len() int { return len(k) }
141 func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
142 func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
144 // FileRun contents are sorted by Kind for the reduction into KindRuns.
145 func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
147 // newKindRun allocates a new KindRun from the SpotInfo run h.
148 func newKindRun(h RunList) interface{} {
149 run := make(KindRun, len(h))
150 for i, x := range h {
151 run[i] = x.(SpotInfo)
154 // Spots were sorted by file and kind to create this run.
155 // Within this run, sort them by line number or index.
158 if removeDuplicates {
159 // Since both the lori and kind field must be
160 // same for duplicates, and since the isIndex
161 // bit is always the same for all infos in one
162 // list we can simply compare the entire info.
164 prev := SpotInfo(1<<32 - 1) // an unlikely value
165 for _, x := range run {
178 // ----------------------------------------------------------------------------
181 // A Pak describes a Go package.
183 Path string // path of directory containing the package
184 Name string // package name as declared by package clause
187 // Paks are sorted by name (primary key) and by import path (secondary key).
188 func (p *Pak) less(q *Pak) bool {
189 return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
192 // A File describes a Go file.
194 Name string // directory-local file name
195 Pak *Pak // the package to which the file belongs
198 // Path returns the file path of f.
199 func (f *File) Path() string {
200 return pathpkg.Join(f.Pak.Path, f.Name)
203 // A Spot describes a single occurrence of a word.
209 // A FileRun is a list of KindRuns belonging to the same file.
210 type FileRun struct {
215 // Spots are sorted by file path for the reduction into FileRuns.
216 func lessSpot(x, y interface{}) bool {
219 // same as "return fx.Path() < fy.Path()" but w/o computing the file path first
222 return px < py || px == py && fx.Name < fy.Name
225 // newFileRun allocates a new FileRun from the Spot run h.
226 func newFileRun(h RunList) interface{} {
227 file := h[0].(Spot).File
229 // reduce the list of Spots into a list of KindRuns
230 h1 := make(RunList, len(h))
231 for i, x := range h {
232 h1[i] = x.(Spot).Info
234 h2 := h1.reduce(lessKind, newKindRun)
236 // create the FileRun
237 groups := make([]KindRun, len(h2))
238 for i, x := range h2 {
239 groups[i] = x.(KindRun)
241 return &FileRun{file, groups}
244 // ----------------------------------------------------------------------------
247 // A PakRun describes a run of *FileRuns of a package.
253 // Sorting support for files within a PakRun.
254 func (p *PakRun) Len() int { return len(p.Files) }
255 func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
256 func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
258 // FileRuns are sorted by package for the reduction into PakRuns.
259 func lessFileRun(x, y interface{}) bool {
260 return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
263 // newPakRun allocates a new PakRun from the *FileRun run h.
264 func newPakRun(h RunList) interface{} {
265 pak := h[0].(*FileRun).File.Pak
266 files := make([]*FileRun, len(h))
267 for i, x := range h {
268 files[i] = x.(*FileRun)
270 run := &PakRun{pak, files}
271 sort.Sort(run) // files were sorted by package; sort them by file now
275 // ----------------------------------------------------------------------------
278 // A HitList describes a list of PakRuns.
279 type HitList []*PakRun
281 // PakRuns are sorted by package.
282 func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
284 func reduce(h0 RunList) HitList {
285 // reduce a list of Spots into a list of FileRuns
286 h1 := h0.reduce(lessSpot, newFileRun)
287 // reduce a list of FileRuns into a list of PakRuns
288 h2 := h1.reduce(lessFileRun, newPakRun)
289 // sort the list of PakRuns by package
292 h := make(HitList, len(h2))
293 for i, p := range h2 {
299 // filter returns a new HitList created by filtering
300 // all PakRuns from h that have a matching pakname.
301 func (h HitList) filter(pakname string) HitList {
303 for _, p := range h {
304 if p.Pak.Name == pakname {
311 // ----------------------------------------------------------------------------
314 type wordPair struct {
315 canon string // canonical word spelling (all lowercase)
316 alt string // alternative spelling
319 // An AltWords describes a list of alternative spellings for a
320 // canonical (all lowercase) spelling of a word.
321 type AltWords struct {
322 Canon string // canonical word spelling (all lowercase)
323 Alts []string // alternative spelling for the same word
326 // wordPairs are sorted by their canonical spelling.
327 func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
329 // newAltWords allocates a new AltWords from the *wordPair run h.
330 func newAltWords(h RunList) interface{} {
331 canon := h[0].(*wordPair).canon
332 alts := make([]string, len(h))
333 for i, x := range h {
334 alts[i] = x.(*wordPair).alt
336 return &AltWords{canon, alts}
339 func (a *AltWords) filter(s string) *AltWords {
341 for _, w := range a.Alts {
343 alts = append(alts, w)
347 return &AltWords{a.Canon, alts}
352 // Ident stores information about external identifiers in order to create
353 // links to package documentation.
355 Path string // e.g. "net/http"
356 Package string // e.g. "http"
357 Name string // e.g. "NewRequest"
358 Doc string // e.g. "NewRequest returns a new Request..."
361 // byImportCount sorts the given slice of Idents by the import
362 // counts of the packages to which they belong.
363 type byImportCount struct {
365 ImportCount map[string]int
368 func (ic byImportCount) Len() int {
369 return len(ic.Idents)
372 func (ic byImportCount) Less(i, j int) bool {
373 ri := ic.ImportCount[ic.Idents[i].Path]
374 rj := ic.ImportCount[ic.Idents[j].Path]
376 return ic.Idents[i].Path < ic.Idents[j].Path
381 func (ic byImportCount) Swap(i, j int) {
382 ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
385 func (ic byImportCount) String() string {
386 buf := bytes.NewBuffer([]byte("["))
387 for _, v := range ic.Idents {
388 buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
390 buf.WriteString("\n]")
394 // filter creates a new Ident list where the results match the given
396 func (ic byImportCount) filter(pakname string) []Ident {
397 if ic.Idents == nil {
401 for _, i := range ic.Idents {
402 if i.Package == pakname {
409 // top returns the top n identifiers.
410 func (ic byImportCount) top(n int) []Ident {
411 if len(ic.Idents) > n {
417 // ----------------------------------------------------------------------------
420 type IndexResult struct {
421 Decls RunList // package-level declarations (with snippets)
422 Others RunList // all other occurrences
425 // Statistics provides statistics information for an index.
426 type Statistics struct {
427 Bytes int // total size of indexed source files
428 Files int // number of indexed source files
429 Lines int // number of lines (all files)
430 Words int // number of different identifiers
431 Spots int // number of identifier occurrences
434 // An Indexer maintains the data structures and provides the machinery
435 // for indexing .go files under a file tree. It implements the path.Visitor
436 // interface for walking file trees, and the ast.Visitor interface for
438 type Indexer struct {
440 fset *token.FileSet // file set for all indexed files
441 fsOpenGate chan bool // send pre fs.Open; receive on close
443 mu sync.Mutex // guards all the following
444 sources bytes.Buffer // concatenated sources
445 strings map[string]string // interned string
446 packages map[Pak]*Pak // interned *Paks
447 words map[string]*IndexResult // RunLists of Spots
448 snippets []*Snippet // indices are stored in SpotInfos
449 current *token.File // last file added to file set
450 file *File // AST for current file
451 decl ast.Decl // AST for current decl
453 throttle *util.Throttle
454 importCount map[string]int // package path ("net/http") => count
455 packagePath map[string]map[string]bool // "template" => "text/template" => true
456 exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
457 curPkgExports map[string]SpotKind
458 idents map[SpotKind]map[string][]Ident // kind => name => list of Idents
461 func (x *Indexer) intern(s string) string {
462 if s, ok := x.strings[s]; ok {
469 func (x *Indexer) lookupPackage(path, name string) *Pak {
470 // In the source directory tree, more than one package may
471 // live in the same directory. For the packages map, construct
472 // a key that includes both the directory path and the package
474 key := Pak{Path: x.intern(path), Name: x.intern(name)}
475 pak := x.packages[key]
478 x.packages[key] = pak
483 func (x *Indexer) addSnippet(s *Snippet) int {
484 index := len(x.snippets)
485 x.snippets = append(x.snippets, s)
489 func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
493 name := x.intern(id.Name)
496 case TypeDecl, FuncDecl, ConstDecl, VarDecl:
497 x.curPkgExports[name] = kind
500 lists, found := x.words[name]
502 lists = new(IndexResult)
503 x.words[name] = lists
506 if kind == Use || x.decl == nil {
508 // not a declaration or no snippet required
509 info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
510 lists.Others = append(lists.Others, Spot{x.file, info})
513 // a declaration with snippet
514 index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
515 info := makeSpotInfo(kind, index, true)
516 lists.Decls = append(lists.Decls, Spot{x.file, info})
522 func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) {
523 for _, f := range flist.List {
524 x.decl = nil // no snippets for fields
525 for _, name := range f.Names {
526 x.visitIdent(kind, name)
529 // ignore tag - not indexed at the moment
533 func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
534 switch n := spec.(type) {
535 case *ast.ImportSpec:
536 x.visitIdent(ImportDecl, n.Name)
538 if imp, err := strconv.Unquote(n.Path.Value); err == nil {
539 x.importCount[x.intern(imp)]++
544 for _, n := range n.Names {
545 x.visitIdent(kind, n)
548 for _, v := range n.Values {
553 x.visitIdent(TypeDecl, n.Name)
558 func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
560 if decl.Tok == token.CONST {
564 for _, s := range decl.Specs {
569 func (x *Indexer) Visit(node ast.Node) ast.Visitor {
570 switch n := node.(type) {
578 x.visitFieldList(VarDecl, n)
580 case *ast.InterfaceType:
581 x.visitFieldList(MethodDecl, n.Methods)
584 // local declarations should only be *ast.GenDecls;
585 // ignore incorrect ASTs
586 if decl, ok := n.Decl.(*ast.GenDecl); ok {
587 x.decl = nil // no snippets for local declarations
602 x.visitIdent(kind, n.Name)
610 x.visitIdent(PackageClause, n.Name)
611 for _, d := range n.Decls {
622 // addFile adds a file to the index if possible and returns the file set file
623 // and the file's AST if it was successfully parsed as a Go file. If addFile
624 // failed (that is, if the file was not added), it returns file == nil.
625 func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
628 // The file set's base offset and x.sources size must be in lock-step;
629 // this permits the direct mapping of suffix array lookup results to
630 // to corresponding Pos values.
632 // When a file is added to the file set, its offset base increases by
633 // the size of the file + 1; and the initial base offset is 1. Add an
634 // extra byte to the sources here.
635 x.sources.WriteByte(0)
637 // If the sources length doesn't match the file set base at this point
638 // the file set implementation changed or we have another error.
639 base := x.fset.Base()
640 if x.sources.Len() != base {
641 panic("internal error: file base incorrect")
644 // append file contents (src) to x.sources
645 if _, err := x.sources.ReadFrom(f); err == nil {
646 src := x.sources.Bytes()[base:]
649 // parse the file and in the process add it to the file set
650 if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
651 file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
654 // file has parse errors, and the AST may be incorrect -
655 // set lines information explicitly and index as ordinary
656 // text file (cannot fall through to the text case below
657 // because the file has already been added to the file set
659 file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
660 file.SetLinesForContent(src)
665 if util.IsText(src) {
666 // only add the file to the file set (for the full text index)
667 file = x.fset.AddFile(filename, x.fset.Base(), len(src))
668 file.SetLinesForContent(src)
673 // discard possibly added data
674 x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
678 // Design note: Using an explicit white list of permitted files for indexing
679 // makes sure that the important files are included and massively reduces the
680 // number of files to index. The advantage over a blacklist is that unexpected
681 // (non-blacklisted) files won't suddenly explode the index.
683 // Files are whitelisted if they have a file name or extension
684 // present as key in whitelisted.
685 var whitelisted = map[string]bool{
707 "CONTRIBUTORS": true,
714 // isWhitelisted returns true if a file is on the list
715 // of "permitted" files for indexing. The filename must
716 // be the directory-local name of the file.
717 func isWhitelisted(filename string) bool {
718 key := pathpkg.Ext(filename)
720 // file has no extension - use entire filename
723 return whitelisted[key]
726 func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) {
727 pkgName := x.intern(astFile.Name.Name)
728 if pkgName == "main" {
731 pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
732 astPkg := ast.Package{
734 Files: map[string]*ast.File{
739 docPkg := doc.New(&astPkg, dirname, m)
740 addIdent := func(sk SpotKind, name string, docstr string) {
741 if x.idents[sk] == nil {
742 x.idents[sk] = make(map[string][]Ident)
744 name = x.intern(name)
745 x.idents[sk][name] = append(x.idents[sk][name], Ident{
749 Doc: doc.Synopsis(docstr),
753 if x.idents[PackageClause] == nil {
754 x.idents[PackageClause] = make(map[string][]Ident)
756 // List of words under which the package identifier will be stored.
757 // This includes the package name and the components of the directory
758 // in which it resides.
759 words := strings.Split(pathpkg.Dir(pkgPath), "/")
763 name := x.intern(docPkg.Name)
764 synopsis := doc.Synopsis(docPkg.Doc)
765 words = append(words, name)
772 for _, word := range words {
773 word = x.intern(word)
775 pkgs := x.idents[PackageClause][word]
776 for i, p := range pkgs {
777 if p.Path == pkgPath {
778 if docPkg.Doc != "" {
787 x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
791 for _, c := range docPkg.Consts {
792 for _, name := range c.Names {
793 addIdent(ConstDecl, name, c.Doc)
796 for _, t := range docPkg.Types {
797 addIdent(TypeDecl, t.Name, t.Doc)
798 for _, c := range t.Consts {
799 for _, name := range c.Names {
800 addIdent(ConstDecl, name, c.Doc)
803 for _, v := range t.Vars {
804 for _, name := range v.Names {
805 addIdent(VarDecl, name, v.Doc)
808 for _, f := range t.Funcs {
809 addIdent(FuncDecl, f.Name, f.Doc)
811 for _, f := range t.Methods {
812 addIdent(MethodDecl, f.Name, f.Doc)
813 // Change the name of methods to be "<typename>.<methodname>".
814 // They will still be indexed as <methodname>.
815 idents := x.idents[MethodDecl][f.Name]
816 idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
819 for _, v := range docPkg.Vars {
820 for _, name := range v.Names {
821 addIdent(VarDecl, name, v.Doc)
824 for _, f := range docPkg.Funcs {
825 addIdent(FuncDecl, f.Name, f.Doc)
829 func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) {
830 pkgName := astFile.Name.Name
834 pak := x.lookupPackage(dirname, pkgName)
835 x.file = &File{filename, pak}
840 // Test files are already filtered out in visitFile if IndexGoCode and
841 // IndexFullText are false. Otherwise, check here.
842 isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) &&
843 (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/"))
845 x.indexDocs(dirname, filename, astFile)
849 ppKey := x.intern(pkgName)
850 if _, ok := x.packagePath[ppKey]; !ok {
851 x.packagePath[ppKey] = make(map[string]bool)
853 pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
854 x.packagePath[ppKey][pkgPath] = true
856 // Merge in exported symbols found walking this file into
857 // the map for that package.
858 if len(x.curPkgExports) > 0 {
859 dest, ok := x.exports[pkgPath]
861 dest = make(map[string]SpotKind)
862 x.exports[pkgPath] = dest
864 for k, v := range x.curPkgExports {
870 func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
871 if fi.IsDir() || !x.c.IndexEnabled {
875 filename := pathpkg.Join(dirname, fi.Name())
876 goFile := isGoFile(fi)
879 case x.c.IndexFullText:
880 if !isWhitelisted(fi.Name()) {
883 case x.c.IndexGoCode:
889 strings.HasSuffix(fi.Name(), "_test.go") ||
890 strings.HasPrefix(dirname, "/test/") {
894 // No indexing turned on.
899 defer func() { <-x.fsOpenGate }()
902 f, err := x.c.fs.Open(filename)
910 x.throttle.Throttle()
912 x.curPkgExports = make(map[string]SpotKind)
913 file, fast := x.addFile(f, filename, goFile)
915 return // addFile failed
919 x.indexGoFile(dirname, fi.Name(), file, fast)
923 x.stats.Bytes += file.Size()
925 x.stats.Lines += file.LineCount()
928 // indexOptions contains information that affects the contents of an index.
929 type indexOptions struct {
930 // Docs provides documentation search results.
931 // It is only consulted if IndexEnabled is true.
932 // The default values is true.
935 // GoCode provides Go source code search results.
936 // It is only consulted if IndexEnabled is true.
937 // The default values is true.
940 // FullText provides search results from all files.
941 // It is only consulted if IndexEnabled is true.
942 // The default values is true.
945 // MaxResults optionally specifies the maximum results for indexing.
946 // The default is 1000.
950 // ----------------------------------------------------------------------------
953 type LookupResult struct {
954 Decls HitList // package-level declarations (with snippets)
955 Others HitList // all other occurrences
959 fset *token.FileSet // file set used during indexing; nil if no textindex
960 suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex
961 words map[string]*LookupResult // maps words to hit lists
962 alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings
963 snippets []*Snippet // all snippets, indexed by snippet index
965 importCount map[string]int // package path ("net/http") => count
966 packagePath map[string]map[string]bool // "template" => "text/template" => true
967 exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
968 idents map[SpotKind]map[string][]Ident
972 func canonical(w string) string { return strings.ToLower(w) }
974 // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
975 // consuming file descriptors, where some systems have low 256 or 512 limits.
976 // Go should have a built-in way to cap fd usage under the ulimit.
982 func (c *Corpus) throttle() float64 {
983 if c.IndexThrottle <= 0 {
986 if c.IndexThrottle > 1.0 {
989 return c.IndexThrottle
992 // NewIndex creates a new index for the .go files provided by the corpus.
993 func (c *Corpus) NewIndex() *Index {
994 // initialize Indexer
995 // (use some reasonably sized maps to start)
998 fset: token.NewFileSet(),
999 fsOpenGate: make(chan bool, maxOpenFiles),
1000 strings: make(map[string]string),
1001 packages: make(map[Pak]*Pak, 256),
1002 words: make(map[string]*IndexResult, 8192),
1003 throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
1004 importCount: make(map[string]int),
1005 packagePath: make(map[string]map[string]bool),
1006 exports: make(map[string]map[string]SpotKind),
1007 idents: make(map[SpotKind]map[string][]Ident, 4),
1010 // index all files in the directories given by dirnames
1011 var wg sync.WaitGroup // outstanding ReadDir + visitFile
1012 dirGate := make(chan bool, maxOpenDirs)
1013 for dirname := range c.fsDirnames() {
1014 if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
1019 go func(dirname string) {
1020 defer func() { <-dirGate }()
1023 list, err := c.fs.ReadDir(dirname)
1025 log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
1026 return // ignore this directory
1028 for _, fi := range list {
1030 go func(fi os.FileInfo) {
1032 x.visitFile(dirname, fi)
1039 if !c.IndexFullText {
1040 // the file set, the current file, and the sources are
1041 // not needed after indexing if no text index is built -
1042 // help GC and clear them
1045 x.current = nil // contains reference to fset!
1048 // for each word, reduce the RunLists into a LookupResult;
1049 // also collect the word with its canonical spelling in a
1050 // word list for later computation of alternative spellings
1051 words := make(map[string]*LookupResult)
1053 for w, h := range x.words {
1054 decls := reduce(h.Decls)
1055 others := reduce(h.Others)
1056 words[w] = &LookupResult{
1060 wlist = append(wlist, &wordPair{canonical(w), w})
1061 x.throttle.Throttle()
1063 x.stats.Words = len(words)
1065 // reduce the word list {canonical(w), w} into
1066 // a list of AltWords runs {canonical(w), {w}}
1067 alist := wlist.reduce(lessWordPair, newAltWords)
1069 // convert alist into a map of alternative spellings
1070 alts := make(map[string]*AltWords)
1071 for i := 0; i < len(alist); i++ {
1072 a := alist[i].(*AltWords)
1076 // create text index
1077 var suffixes *suffixarray.Index
1078 if c.IndexFullText {
1079 suffixes = suffixarray.New(x.sources.Bytes())
1082 // sort idents by the number of imports of their respective packages
1083 for _, idMap := range x.idents {
1084 for _, ir := range idMap {
1085 sort.Sort(byImportCount{ir, x.importCount})
1094 snippets: x.snippets,
1096 importCount: x.importCount,
1097 packagePath: x.packagePath,
1101 Docs: x.c.IndexDocs,
1102 GoCode: x.c.IndexGoCode,
1103 FullText: x.c.IndexFullText,
1104 MaxResults: x.c.MaxResults,
1109 var ErrFileIndexVersion = errors.New("file index version out of date")
1111 const fileIndexVersion = 3
1113 // fileIndex is the subset of Index that's gob-encoded for use by
1114 // Index.Write and Index.Read.
1115 type fileIndex struct {
1117 Words map[string]*LookupResult
1118 Alts map[string]*AltWords
1122 ImportCount map[string]int
1123 PackagePath map[string]map[string]bool
1124 Exports map[string]map[string]SpotKind
1125 Idents map[SpotKind]map[string][]Ident
1129 func (x *fileIndex) Write(w io.Writer) error {
1130 return gob.NewEncoder(w).Encode(x)
1133 func (x *fileIndex) Read(r io.Reader) error {
1134 return gob.NewDecoder(r).Decode(x)
1137 // WriteTo writes the index x to w.
1138 func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
1139 w = countingWriter{&n, w}
1141 if x.suffixes != nil {
1145 Version: fileIndexVersion,
1148 Snippets: x.snippets,
1151 ImportCount: x.importCount,
1152 PackagePath: x.packagePath,
1157 if err := fx.Write(w); err != nil {
1161 encode := func(x interface{}) error {
1162 return gob.NewEncoder(w).Encode(x)
1164 if err := x.fset.Write(encode); err != nil {
1167 if err := x.suffixes.Write(w); err != nil {
1174 // ReadFrom reads the index from r into x; x must not be nil.
1175 // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
1176 // If the index is from an old version, the error is ErrFileIndexVersion.
1177 func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
1178 // We use the ability to read bytes as a plausible surrogate for buffering.
1179 if _, ok := r.(io.ByteReader); !ok {
1180 r = bufio.NewReader(r)
1182 r = countingReader{&n, r.(byteReader)}
1184 if err := fx.Read(r); err != nil {
1187 if fx.Version != fileIndexVersion {
1188 return 0, ErrFileIndexVersion
1192 x.snippets = fx.Snippets
1194 x.importCount = fx.ImportCount
1195 x.packagePath = fx.PackagePath
1196 x.exports = fx.Exports
1197 x.idents = fx.Idents
1200 x.fset = token.NewFileSet()
1201 decode := func(x interface{}) error {
1202 return gob.NewDecoder(r).Decode(x)
1204 if err := x.fset.Read(decode); err != nil {
1207 x.suffixes = new(suffixarray.Index)
1208 if err := x.suffixes.Read(r); err != nil {
1215 // Stats returns index statistics.
1216 func (x *Index) Stats() Statistics {
1220 // ImportCount returns a map from import paths to how many times they were seen.
1221 func (x *Index) ImportCount() map[string]int {
1222 return x.importCount
1225 // PackagePath returns a map from short package name to a set
1226 // of full package path names that use that short package name.
1227 func (x *Index) PackagePath() map[string]map[string]bool {
1228 return x.packagePath
1231 // Exports returns a map from full package path to exported
1232 // symbol name to its type.
1233 func (x *Index) Exports() map[string]map[string]SpotKind {
1237 // Idents returns a map from identifier type to exported
1238 // symbol name to the list of identifiers matching that name.
1239 func (x *Index) Idents() map[SpotKind]map[string][]Ident {
1243 func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
1245 alt = x.alts[canonical(w)]
1246 // remove current spelling from alternatives
1247 // (if there is no match, the alternatives do
1248 // not contain the current spelling)
1249 if match != nil && alt != nil {
1255 // isIdentifier reports whether s is a Go identifier.
1256 func isIdentifier(s string) bool {
1257 for i, ch := range s {
1258 if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
1266 // For a given query, which is either a single identifier or a qualified
1267 // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a
1268 // list of alternative spellings, and identifiers, if any. Any and all results
1269 // may be nil. If the query syntax is wrong, an error is reported.
1270 func (x *Index) Lookup(query string) (*SearchResult, error) {
1271 ss := strings.Split(query, ".")
1273 // check query syntax
1274 for _, s := range ss {
1275 if !isIdentifier(s) {
1276 return nil, errors.New("all query parts must be identifiers")
1279 rslt := &SearchResult{
1281 Idents: make(map[SpotKind][]Ident, 5),
1283 // handle simple and qualified identifiers
1287 rslt.Hit, rslt.Alt = x.lookupWord(ident)
1288 if rslt.Hit != nil {
1289 // found a match - filter packages with same name
1290 // for the list of packages called ident, if any
1291 rslt.Pak = rslt.Hit.Others.filter(ident)
1293 for k, v := range x.idents {
1294 const rsltLimit = 50
1295 ids := byImportCount{v[ident], x.importCount}
1296 rslt.Idents[k] = ids.top(rsltLimit)
1300 pakname, ident := ss[0], ss[1]
1301 rslt.Hit, rslt.Alt = x.lookupWord(ident)
1302 if rslt.Hit != nil {
1303 // found a match - filter by package name
1304 // (no paks - package names are not qualified)
1305 decls := rslt.Hit.Decls.filter(pakname)
1306 others := rslt.Hit.Others.filter(pakname)
1307 rslt.Hit = &LookupResult{decls, others}
1309 for k, v := range x.idents {
1310 ids := byImportCount{v[ident], x.importCount}
1311 rslt.Idents[k] = ids.filter(pakname)
1315 return nil, errors.New("query is not a (qualified) identifier")
1321 func (x *Index) Snippet(i int) *Snippet {
1322 // handle illegal snippet indices gracefully
1323 if 0 <= i && i < len(x.snippets) {
1324 return x.snippets[i]
1329 type positionList []struct {
1334 func (list positionList) Len() int { return len(list) }
1335 func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
1336 func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] }
1338 // unique returns the list sorted and with duplicate entries removed
1339 func unique(list []int) []int {
1343 for _, x := range list {
1344 if i == 0 || x != last {
1353 // A FileLines value specifies a file and line numbers within that file.
1354 type FileLines struct {
1359 // LookupRegexp returns the number of matches and the matches where a regular
1360 // expression r is found in the full text index. At most n matches are
1361 // returned (thus found <= n).
1363 func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
1364 if x.suffixes == nil || n <= 0 {
1369 var list positionList
1370 // FindAllIndex may returns matches that span across file boundaries.
1371 // Such matches are unlikely, buf after eliminating them we may end up
1372 // with fewer than n matches. If we don't have enough at the end, redo
1373 // the search with an increased value n1, but only if FindAllIndex
1374 // returned all the requested matches in the first place (if it
1375 // returned fewer than that there cannot be more).
1376 for n1 := n; found < n; n1 += n - found {
1378 matches := x.suffixes.FindAllIndex(r, n1)
1379 // compute files, exclude matches that span file boundaries,
1380 // and map offsets to file-local offsets
1381 list = make(positionList, len(matches))
1382 for _, m := range matches {
1383 // by construction, an offset corresponds to the Pos value
1384 // for the file set - use it to get the file and line
1385 p := token.Pos(m[0])
1386 if file := x.fset.File(p); file != nil {
1387 if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
1388 // match [m[0], m[1]) is within the file boundaries
1389 list[found].filename = file.Name()
1390 list[found].line = file.Line(p)
1395 if found == n || len(matches) < n1 {
1396 // found all matches or there's no chance to find more
1400 list = list[0:found]
1401 sort.Sort(list) // sort by filename
1403 // collect matches belonging to the same file
1406 addLines := func() {
1408 // remove duplicate lines
1409 result = append(result, FileLines{last, unique(lines)})
1413 for _, m := range list {
1414 if m.filename != last {
1418 lines = append(lines, m.line)
1425 // InvalidateIndex should be called whenever any of the file systems
1426 // under godoc's observation change so that the indexer is kicked on.
1427 func (c *Corpus) invalidateIndex() {
1428 c.fsModified.Set(nil)
1432 // feedDirnames feeds the directory names of all directories
1433 // under the file system given by root to channel c.
1435 func (c *Corpus) feedDirnames(ch chan<- string) {
1436 if dir, _ := c.fsTree.Get(); dir != nil {
1437 for d := range dir.(*Directory).iter(false) {
1443 // fsDirnames() returns a channel sending all directory names
1444 // of all the file systems under godoc's observation.
1446 func (c *Corpus) fsDirnames() <-chan string {
1447 ch := make(chan string, 256) // buffered for fewer context switches
1455 // CompatibleWith reports whether the Index x is compatible with the corpus
1456 // indexing options set in c.
1457 func (x *Index) CompatibleWith(c *Corpus) bool {
1458 return x.opts.Docs == c.IndexDocs &&
1459 x.opts.GoCode == c.IndexGoCode &&
1460 x.opts.FullText == c.IndexFullText &&
1461 x.opts.MaxResults == c.MaxResults
1464 func (c *Corpus) readIndex(filenames string) error {
1465 matches, err := filepath.Glob(filenames)
1468 } else if matches == nil {
1469 return fmt.Errorf("no index files match %q", filenames)
1471 sort.Strings(matches) // make sure files are in the right order
1472 files := make([]io.Reader, 0, len(matches))
1473 for _, filename := range matches {
1474 f, err := os.Open(filename)
1479 files = append(files, f)
1481 return c.ReadIndexFrom(io.MultiReader(files...))
1484 // ReadIndexFrom sets the current index from the serialized version found in r.
1485 func (c *Corpus) ReadIndexFrom(r io.Reader) error {
1487 if _, err := x.ReadFrom(r); err != nil {
1490 if !x.CompatibleWith(c) {
1491 return fmt.Errorf("index file options are incompatible: %v", x.opts)
1493 c.searchIndex.Set(x)
1497 func (c *Corpus) UpdateIndex() {
1499 log.Printf("updating index...")
1502 index := c.NewIndex()
1504 c.searchIndex.Set(index)
1506 secs := stop.Sub(start).Seconds()
1507 stats := index.Stats()
1508 log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
1509 secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
1511 memstats := new(runtime.MemStats)
1512 runtime.ReadMemStats(memstats)
1514 log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1517 runtime.ReadMemStats(memstats)
1519 log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1523 // RunIndexer runs forever, indexing.
1524 func (c *Corpus) RunIndexer() {
1525 // initialize the index from disk if possible
1526 if c.IndexFiles != "" {
1528 if err := c.readIndex(c.IndexFiles); err != nil {
1529 log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
1534 // Repeatedly update the package directory tree and index.
1538 if c.IndexInterval < 0 {
1541 delay := 5 * time.Minute // by default, reindex every 5 minutes
1542 if c.IndexInterval > 0 {
1543 delay = c.IndexInterval
1549 type countingWriter struct {
1554 func (c countingWriter) Write(p []byte) (n int, err error) {
1555 n, err = c.w.Write(p)
1560 type byteReader interface {
1565 type countingReader struct {
1570 func (c countingReader) Read(p []byte) (n int, err error) {
1571 n, err = c.r.Read(p)
1576 func (c countingReader) ReadByte() (b byte, err error) {
1577 b, err = c.r.ReadByte()