// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file implements FormatSelections and FormatText. // FormatText is used to HTML-format Go and non-Go source // text with line numbers and highlighted sections. It is // built on top of FormatSelections, a generic formatter // for "selected" text. package godoc import ( "fmt" "go/scanner" "go/token" "io" "regexp" "strconv" "text/template" ) // ---------------------------------------------------------------------------- // Implementation of FormatSelections // A Segment describes a text segment [start, end). // The zero value of a Segment is a ready-to-use empty segment. // type Segment struct { start, end int } func (seg *Segment) isEmpty() bool { return seg.start >= seg.end } // A Selection is an "iterator" function returning a text segment. // Repeated calls to a selection return consecutive, non-overlapping, // non-empty segments, followed by an infinite sequence of empty // segments. The first empty segment marks the end of the selection. // type Selection func() Segment // A LinkWriter writes some start or end "tag" to w for the text offset offs. // It is called by FormatSelections at the start or end of each link segment. // type LinkWriter func(w io.Writer, offs int, start bool) // A SegmentWriter formats a text according to selections and writes it to w. // The selections parameter is a bit set indicating which selections provided // to FormatSelections overlap with the text segment: If the n'th bit is set // in selections, the n'th selection provided to FormatSelections is overlapping // with the text. // type SegmentWriter func(w io.Writer, text []byte, selections int) // FormatSelections takes a text and writes it to w using link and segment // writers lw and sw as follows: lw is invoked for consecutive segment starts // and ends as specified through the links selection, and sw is invoked for // consecutive segments of text overlapped by the same selections as specified // by selections. The link writer lw may be nil, in which case the links // Selection is ignored. // func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) { // If we have a link writer, make the links // selection the last entry in selections if lw != nil { selections = append(selections, links) } // compute the sequence of consecutive segment changes changes := newMerger(selections) // The i'th bit in bitset indicates that the text // at the current offset is covered by selections[i]. bitset := 0 lastOffs := 0 // Text segments are written in a delayed fashion // such that consecutive segments belonging to the // same selection can be combined (peephole optimization). // last describes the last segment which has not yet been written. var last struct { begin, end int // valid if begin < end bitset int } // flush writes the last delayed text segment flush := func() { if last.begin < last.end { sw(w, text[last.begin:last.end], last.bitset) } last.begin = last.end // invalidate last } // segment runs the segment [lastOffs, end) with the selection // indicated by bitset through the segment peephole optimizer. segment := func(end int) { if lastOffs < end { // ignore empty segments if last.end != lastOffs || last.bitset != bitset { // the last segment is not adjacent to or // differs from the new one flush() // start a new segment last.begin = lastOffs } last.end = end last.bitset = bitset } } for { // get the next segment change index, offs, start := changes.next() if index < 0 || offs > len(text) { // no more segment changes or the next change // is past the end of the text - we're done break } // determine the kind of segment change if lw != nil && index == len(selections)-1 { // we have a link segment change (see start of this function): // format the previous selection segment, write the // link tag and start a new selection segment segment(offs) flush() lastOffs = offs lw(w, offs, start) } else { // we have a selection change: // format the previous selection segment, determine // the new selection bitset and start a new segment segment(offs) lastOffs = offs mask := 1 << uint(index) if start { bitset |= mask } else { bitset &^= mask } } } segment(len(text)) flush() } // A merger merges a slice of Selections and produces a sequence of // consecutive segment change events through repeated next() calls. // type merger struct { selections []Selection segments []Segment // segments[i] is the next segment of selections[i] } const infinity int = 2e9 func newMerger(selections []Selection) *merger { segments := make([]Segment, len(selections)) for i, sel := range selections { segments[i] = Segment{infinity, infinity} if sel != nil { if seg := sel(); !seg.isEmpty() { segments[i] = seg } } } return &merger{selections, segments} } // next returns the next segment change: index specifies the Selection // to which the segment belongs, offs is the segment start or end offset // as determined by the start value. If there are no more segment changes, // next returns an index value < 0. // func (m *merger) next() (index, offs int, start bool) { // find the next smallest offset where a segment starts or ends offs = infinity index = -1 for i, seg := range m.segments { switch { case seg.start < offs: offs = seg.start index = i start = true case seg.end < offs: offs = seg.end index = i start = false } } if index < 0 { // no offset found => all selections merged return } // offset found - it's either the start or end offset but // either way it is ok to consume the start offset: set it // to infinity so it won't be considered in the following // next call m.segments[index].start = infinity if start { return } // end offset found - consume it m.segments[index].end = infinity // advance to the next segment for that selection seg := m.selections[index]() if !seg.isEmpty() { m.segments[index] = seg } return } // ---------------------------------------------------------------------------- // Implementation of FormatText // lineSelection returns the line segments for text as a Selection. func lineSelection(text []byte) Selection { i, j := 0, 0 return func() (seg Segment) { // find next newline, if any for j < len(text) { j++ if text[j-1] == '\n' { break } } if i < j { // text[i:j] constitutes a line seg = Segment{i, j} i = j } return } } // tokenSelection returns, as a selection, the sequence of // consecutive occurrences of token sel in the Go src text. // func tokenSelection(src []byte, sel token.Token) Selection { var s scanner.Scanner fset := token.NewFileSet() file := fset.AddFile("", fset.Base(), len(src)) s.Init(file, src, nil, scanner.ScanComments) return func() (seg Segment) { for { pos, tok, lit := s.Scan() if tok == token.EOF { break } offs := file.Offset(pos) if tok == sel { seg = Segment{offs, offs + len(lit)} break } } return } } // makeSelection is a helper function to make a Selection from a slice of pairs. // Pairs describing empty segments are ignored. // func makeSelection(matches [][]int) Selection { i := 0 return func() Segment { for i < len(matches) { m := matches[i] i++ if m[0] < m[1] { // non-empty segment return Segment{m[0], m[1]} } } return Segment{} } } // regexpSelection computes the Selection for the regular expression expr in text. func regexpSelection(text []byte, expr string) Selection { var matches [][]int if rx, err := regexp.Compile(expr); err == nil { matches = rx.FindAllIndex(text, -1) } return makeSelection(matches) } var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`) // RangeSelection computes the Selection for a text range described // by the argument str; the range description must match the selRx // regular expression. func RangeSelection(str string) Selection { m := selRx.FindStringSubmatch(str) if len(m) >= 2 { from, _ := strconv.Atoi(m[1]) to, _ := strconv.Atoi(m[2]) if from < to { return makeSelection([][]int{{from, to}}) } } return nil } // Span tags for all the possible selection combinations that may // be generated by FormatText. Selections are indicated by a bitset, // and the value of the bitset specifies the tag to be used. // // bit 0: comments // bit 1: highlights // bit 2: selections // var startTags = [][]byte{ /* 000 */ []byte(``), /* 001 */ []byte(``), /* 010 */ []byte(``), /* 011 */ []byte(``), /* 100 */ []byte(``), /* 101 */ []byte(``), /* 110 */ []byte(``), /* 111 */ []byte(``), } var endTag = []byte(``) func selectionTag(w io.Writer, text []byte, selections int) { if selections < len(startTags) { if tag := startTags[selections]; len(tag) > 0 { w.Write(tag) template.HTMLEscape(w, text) w.Write(endTag) return } } template.HTMLEscape(w, text) } // FormatText HTML-escapes text and writes it to w. // Consecutive text segments are wrapped in HTML spans (with tags as // defined by startTags and endTag) as follows: // // - if line >= 0, line number (ln) spans are inserted before each line, // starting with the value of line // - if the text is Go source, comments get the "comment" span class // - each occurrence of the regular expression pattern gets the "highlight" // span class // - text segments covered by selection get the "selection" span class // // Comments, highlights, and selections may overlap arbitrarily; the respective // HTML span classes are specified in the startTags variable. // func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) { var comments, highlights Selection if goSource { comments = tokenSelection(text, token.COMMENT) } if pattern != "" { highlights = regexpSelection(text, pattern) } if line >= 0 || comments != nil || highlights != nil || selection != nil { var lineTag LinkWriter if line >= 0 { lineTag = func(w io.Writer, _ int, start bool) { if start { fmt.Fprintf(w, "%6d", line, line) line++ } } } FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection) } else { template.HTMLEscape(w, text) } }