1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This file implements FormatSelections and FormatText.
6 // FormatText is used to HTML-format Go and non-Go source
7 // text with line numbers and highlighted sections. It is
8 // built on top of FormatSelections, a generic formatter
9 // for "selected" text.
23 // ----------------------------------------------------------------------------
24 // Implementation of FormatSelections
26 // A Segment describes a text segment [start, end).
27 // The zero value of a Segment is a ready-to-use empty segment.
33 func (seg *Segment) isEmpty() bool { return seg.start >= seg.end }
35 // A Selection is an "iterator" function returning a text segment.
36 // Repeated calls to a selection return consecutive, non-overlapping,
37 // non-empty segments, followed by an infinite sequence of empty
38 // segments. The first empty segment marks the end of the selection.
40 type Selection func() Segment
42 // A LinkWriter writes some start or end "tag" to w for the text offset offs.
43 // It is called by FormatSelections at the start or end of each link segment.
45 type LinkWriter func(w io.Writer, offs int, start bool)
47 // A SegmentWriter formats a text according to selections and writes it to w.
48 // The selections parameter is a bit set indicating which selections provided
49 // to FormatSelections overlap with the text segment: If the n'th bit is set
50 // in selections, the n'th selection provided to FormatSelections is overlapping
53 type SegmentWriter func(w io.Writer, text []byte, selections int)
55 // FormatSelections takes a text and writes it to w using link and segment
56 // writers lw and sw as follows: lw is invoked for consecutive segment starts
57 // and ends as specified through the links selection, and sw is invoked for
58 // consecutive segments of text overlapped by the same selections as specified
59 // by selections. The link writer lw may be nil, in which case the links
60 // Selection is ignored.
62 func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
63 // If we have a link writer, make the links
64 // selection the last entry in selections
66 selections = append(selections, links)
69 // compute the sequence of consecutive segment changes
70 changes := newMerger(selections)
72 // The i'th bit in bitset indicates that the text
73 // at the current offset is covered by selections[i].
77 // Text segments are written in a delayed fashion
78 // such that consecutive segments belonging to the
79 // same selection can be combined (peephole optimization).
80 // last describes the last segment which has not yet been written.
82 begin, end int // valid if begin < end
86 // flush writes the last delayed text segment
88 if last.begin < last.end {
89 sw(w, text[last.begin:last.end], last.bitset)
91 last.begin = last.end // invalidate last
94 // segment runs the segment [lastOffs, end) with the selection
95 // indicated by bitset through the segment peephole optimizer.
96 segment := func(end int) {
97 if lastOffs < end { // ignore empty segments
98 if last.end != lastOffs || last.bitset != bitset {
99 // the last segment is not adjacent to or
100 // differs from the new one
102 // start a new segment
103 last.begin = lastOffs
111 // get the next segment change
112 index, offs, start := changes.next()
113 if index < 0 || offs > len(text) {
114 // no more segment changes or the next change
115 // is past the end of the text - we're done
118 // determine the kind of segment change
119 if lw != nil && index == len(selections)-1 {
120 // we have a link segment change (see start of this function):
121 // format the previous selection segment, write the
122 // link tag and start a new selection segment
128 // we have a selection change:
129 // format the previous selection segment, determine
130 // the new selection bitset and start a new segment
133 mask := 1 << uint(index)
145 // A merger merges a slice of Selections and produces a sequence of
146 // consecutive segment change events through repeated next() calls.
149 selections []Selection
150 segments []Segment // segments[i] is the next segment of selections[i]
153 const infinity int = 2e9
155 func newMerger(selections []Selection) *merger {
156 segments := make([]Segment, len(selections))
157 for i, sel := range selections {
158 segments[i] = Segment{infinity, infinity}
160 if seg := sel(); !seg.isEmpty() {
165 return &merger{selections, segments}
168 // next returns the next segment change: index specifies the Selection
169 // to which the segment belongs, offs is the segment start or end offset
170 // as determined by the start value. If there are no more segment changes,
171 // next returns an index value < 0.
173 func (m *merger) next() (index, offs int, start bool) {
174 // find the next smallest offset where a segment starts or ends
177 for i, seg := range m.segments {
179 case seg.start < offs:
190 // no offset found => all selections merged
193 // offset found - it's either the start or end offset but
194 // either way it is ok to consume the start offset: set it
195 // to infinity so it won't be considered in the following
197 m.segments[index].start = infinity
201 // end offset found - consume it
202 m.segments[index].end = infinity
203 // advance to the next segment for that selection
204 seg := m.selections[index]()
206 m.segments[index] = seg
211 // ----------------------------------------------------------------------------
212 // Implementation of FormatText
214 // lineSelection returns the line segments for text as a Selection.
215 func lineSelection(text []byte) Selection {
217 return func() (seg Segment) {
218 // find next newline, if any
221 if text[j-1] == '\n' {
226 // text[i:j] constitutes a line
234 // tokenSelection returns, as a selection, the sequence of
235 // consecutive occurrences of token sel in the Go src text.
237 func tokenSelection(src []byte, sel token.Token) Selection {
238 var s scanner.Scanner
239 fset := token.NewFileSet()
240 file := fset.AddFile("", fset.Base(), len(src))
241 s.Init(file, src, nil, scanner.ScanComments)
242 return func() (seg Segment) {
244 pos, tok, lit := s.Scan()
245 if tok == token.EOF {
248 offs := file.Offset(pos)
250 seg = Segment{offs, offs + len(lit)}
258 // makeSelection is a helper function to make a Selection from a slice of pairs.
259 // Pairs describing empty segments are ignored.
261 func makeSelection(matches [][]int) Selection {
263 return func() Segment {
264 for i < len(matches) {
269 return Segment{m[0], m[1]}
276 // regexpSelection computes the Selection for the regular expression expr in text.
277 func regexpSelection(text []byte, expr string) Selection {
279 if rx, err := regexp.Compile(expr); err == nil {
280 matches = rx.FindAllIndex(text, -1)
282 return makeSelection(matches)
285 var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
287 // RangeSelection computes the Selection for a text range described
288 // by the argument str; the range description must match the selRx
289 // regular expression.
290 func RangeSelection(str string) Selection {
291 m := selRx.FindStringSubmatch(str)
293 from, _ := strconv.Atoi(m[1])
294 to, _ := strconv.Atoi(m[2])
296 return makeSelection([][]int{{from, to}})
302 // Span tags for all the possible selection combinations that may
303 // be generated by FormatText. Selections are indicated by a bitset,
304 // and the value of the bitset specifies the tag to be used.
310 var startTags = [][]byte{
311 /* 000 */ []byte(``),
312 /* 001 */ []byte(`<span class="comment">`),
313 /* 010 */ []byte(`<span class="highlight">`),
314 /* 011 */ []byte(`<span class="highlight-comment">`),
315 /* 100 */ []byte(`<span class="selection">`),
316 /* 101 */ []byte(`<span class="selection-comment">`),
317 /* 110 */ []byte(`<span class="selection-highlight">`),
318 /* 111 */ []byte(`<span class="selection-highlight-comment">`),
321 var endTag = []byte(`</span>`)
323 func selectionTag(w io.Writer, text []byte, selections int) {
324 if selections < len(startTags) {
325 if tag := startTags[selections]; len(tag) > 0 {
327 template.HTMLEscape(w, text)
332 template.HTMLEscape(w, text)
335 // FormatText HTML-escapes text and writes it to w.
336 // Consecutive text segments are wrapped in HTML spans (with tags as
337 // defined by startTags and endTag) as follows:
339 // - if line >= 0, line number (ln) spans are inserted before each line,
340 // starting with the value of line
341 // - if the text is Go source, comments get the "comment" span class
342 // - each occurrence of the regular expression pattern gets the "highlight"
344 // - text segments covered by selection get the "selection" span class
346 // Comments, highlights, and selections may overlap arbitrarily; the respective
347 // HTML span classes are specified in the startTags variable.
349 func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) {
350 var comments, highlights Selection
352 comments = tokenSelection(text, token.COMMENT)
355 highlights = regexpSelection(text, pattern)
357 if line >= 0 || comments != nil || highlights != nil || selection != nil {
358 var lineTag LinkWriter
360 lineTag = func(w io.Writer, _ int, start bool) {
362 fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>", line, line)
367 FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
369 template.HTMLEscape(w, text)