1 // Copyright 2019 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 // CommentToMarkdown converts comment text to formatted markdown.
17 // The comment was prepared by DocReader,
18 // so it is known not to have leading, trailing blank lines
19 // nor to have trailing spaces at the end of lines.
20 // The comment markers have already been removed.
22 // Each line is converted into a markdown line and empty lines are just converted to
23 // newlines. Heading are prefixed with `### ` to make it a markdown heading.
25 // A span of indented lines retains a 4 space prefix block, with the common indent
26 // prefix removed unless empty, in which case it will be converted to a newline.
28 // URLs in the comment text are converted into links.
29 func CommentToMarkdown(text string) string {
30 buf := &bytes.Buffer{}
31 commentToMarkdown(buf, text)
36 mdNewline = []byte("\n")
37 mdHeader = []byte("### ")
38 mdIndent = []byte(" ")
39 mdLinkStart = []byte("[")
40 mdLinkDiv = []byte("](")
41 mdLinkEnd = []byte(")")
44 func commentToMarkdown(w io.Writer, text string) {
46 for _, b := range blocks(text) {
53 for _, line := range b.lines {
54 emphasize(w, line, true)
62 for _, line := range b.lines {
64 commentEscape(w, line, true)
73 for _, line := range b.lines {
93 markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`)
95 unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
98 // commentEscape escapes comment text for markdown. If nice is set,
99 // also turn `` into “; and '' into ”;.
100 func commentEscape(w io.Writer, text string, nice bool) {
102 text = convertQuotes(text)
104 text = escapeRegex(text)
105 w.Write([]byte(text))
108 func convertQuotes(text string) string {
109 return unicodeQuoteReplacer.Replace(text)
112 func escapeRegex(text string) string {
113 return markdownEscape.ReplaceAllString(text, `\$1`)
116 func emphasize(w io.Writer, line string, nice bool) {
118 m := matchRx.FindStringSubmatchIndex(line)
122 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
124 // write text before match
125 commentEscape(w, line[0:m[0]], nice)
127 // adjust match for URLs
128 match := line[m[0]:m[1]]
129 if strings.Contains(match, "://") {
131 for _, s := range []string{"()", "{}", "[]"} {
132 open, close := s[:1], s[1:] // E.g., "(" and ")"
133 // require opening parentheses before closing parentheses (#22285)
134 if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
138 // require balanced pairs of parentheses (#5043)
139 for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
140 m1 = strings.LastIndexAny(line[:m1], s)
145 // redo matching with shortened line for correct indices
146 m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
150 // Following code has been modified from go/doc since words is always
151 // nil. All html formatting has also been transformed into markdown formatting
164 commentEscape(w, match, nice)
168 w.Write([]byte(urlReplacer.Replace(url)))
175 commentEscape(w, line, nice)
178 // Everything from here on is a copy of go/doc/comment.go
181 // Regexp for Go identifiers
182 identRx = `[\pL_][\pL_0-9]*`
185 // Match parens, and check later for balance - see #5043, #22285
186 // Match .,:;?! within path, but not at end - see #18139, #16565
187 // This excludes some rare yet valid urls ending in common punctuation
188 // in order to allow sentences ending in URLs.
190 // protocol (required) e.g. http
191 protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
192 // host (required) e.g. www.example.com or [::1]:8080
193 hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
194 // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
195 pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
197 urlRx = protoPart + `://` + hostPart + pathPart
201 matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
202 urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
205 func indentLen(s string) int {
207 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
213 func isBlank(s string) bool {
214 return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
217 func commonPrefix(a, b string) string {
219 for i < len(a) && i < len(b) && a[i] == b[i] {
225 func unindent(block []string) {
230 // compute maximum common white prefix
231 prefix := block[0][0:indentLen(block[0])]
232 for _, line := range block {
234 prefix = commonPrefix(prefix, line[0:indentLen(line)])
240 for i, line := range block {
247 // heading returns the trimmed line if it passes as a section heading;
248 // otherwise it returns the empty string.
249 func heading(line string) string {
250 line = strings.TrimSpace(line)
255 // a heading must start with an uppercase letter
256 r, _ := utf8.DecodeRuneInString(line)
257 if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
261 // it must end in a letter or digit:
262 r, _ = utf8.DecodeLastRuneInString(line)
263 if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
267 // exclude lines with illegal characters. we allow "(),"
268 if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
272 // allow "'" for possessive "'s" only
274 i := strings.IndexRune(b, '\'')
278 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
279 return "" // not followed by "s "
284 // allow "." when followed by non-space
286 i := strings.IndexRune(b, '.')
290 if i+1 >= len(b) || b[i+1] == ' ' {
291 return "" // not followed by non-space
312 func blocks(text string) []block {
318 lastWasHeading = false
323 out = append(out, block{opPara, para})
328 lines := strings.SplitAfter(text, "\n")
330 for i := 0; i < len(lines); {
339 if indentLen(line) > 0 {
343 // count indented or blank lines
345 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
348 // but not trailing blank lines
349 for j > i && isBlank(lines[j-1]) {
357 // put those lines in a pre block
358 out = append(out, block{opPre, pre})
359 lastWasHeading = false
363 if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
364 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
365 // current line is non-blank, surrounded by blank lines
366 // and the next non-blank line is not indented: this
367 // might be a heading.
368 if head := heading(line); head != "" {
370 out = append(out, block{opHead, []string{head}})
372 lastWasHeading = true
379 lastWasHeading = false
380 para = append(para, lines[i])