12 // CommentToMarkdown converts comment text to formatted markdown.
13 // The comment was prepared by DocReader,
14 // so it is known not to have leading, trailing blank lines
15 // nor to have trailing spaces at the end of lines.
16 // The comment markers have already been removed.
18 // Each line is converted into a markdown line and empty lines are just converted to
19 // newlines. Heading are prefixed with `### ` to make it a markdown heading.
21 // A span of indented lines retains a 4 space prefix block, with the common indent
22 // prefix removed unless empty, in which case it will be converted to a newline.
24 // URLs in the comment text are converted into links.
25 func CommentToMarkdown(text string) string {
26 buf := &bytes.Buffer{}
27 commentToMarkdown(buf, text)
32 mdNewline = []byte("\n")
33 mdHeader = []byte("### ")
34 mdIndent = []byte(" ")
35 mdLinkStart = []byte("[")
36 mdLinkDiv = []byte("](")
37 mdLinkEnd = []byte(")")
40 func commentToMarkdown(w io.Writer, text string) {
42 for _, b := range blocks(text) {
49 for _, line := range b.lines {
50 emphasize(w, line, true)
58 for _, line := range b.lines {
60 commentEscape(w, line, true)
69 for _, line := range b.lines {
89 markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`)
91 unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
94 // commentEscape escapes comment text for markdown. If nice is set,
95 // also turn `` into “; and '' into ”;.
96 func commentEscape(w io.Writer, text string, nice bool) {
98 text = convertQuotes(text)
100 text = escapeRegex(text)
101 w.Write([]byte(text))
104 func convertQuotes(text string) string {
105 return unicodeQuoteReplacer.Replace(text)
108 func escapeRegex(text string) string {
109 return markdownEscape.ReplaceAllString(text, `\$1`)
112 func emphasize(w io.Writer, line string, nice bool) {
114 m := matchRx.FindStringSubmatchIndex(line)
118 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
120 // write text before match
121 commentEscape(w, line[0:m[0]], nice)
123 // adjust match for URLs
124 match := line[m[0]:m[1]]
125 if strings.Contains(match, "://") {
127 for _, s := range []string{"()", "{}", "[]"} {
128 open, close := s[:1], s[1:] // E.g., "(" and ")"
129 // require opening parentheses before closing parentheses (#22285)
130 if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
134 // require balanced pairs of parentheses (#5043)
135 for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
136 m1 = strings.LastIndexAny(line[:m1], s)
141 // redo matching with shortened line for correct indices
142 m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
146 // Following code has been modified from go/doc since words is always
147 // nil. All html formatting has also been transformed into markdown formatting
160 commentEscape(w, match, nice)
164 w.Write([]byte(urlReplacer.Replace(url)))
171 commentEscape(w, line, nice)
174 // Everything from here on is a copy of go/doc/comment.go
177 // Regexp for Go identifiers
178 identRx = `[\pL_][\pL_0-9]*`
181 // Match parens, and check later for balance - see #5043, #22285
182 // Match .,:;?! within path, but not at end - see #18139, #16565
183 // This excludes some rare yet valid urls ending in common punctuation
184 // in order to allow sentences ending in URLs.
186 // protocol (required) e.g. http
187 protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
188 // host (required) e.g. www.example.com or [::1]:8080
189 hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
190 // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
191 pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
193 urlRx = protoPart + `://` + hostPart + pathPart
197 matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
198 urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
201 func indentLen(s string) int {
203 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
209 func isBlank(s string) bool {
210 return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
213 func commonPrefix(a, b string) string {
215 for i < len(a) && i < len(b) && a[i] == b[i] {
221 func unindent(block []string) {
226 // compute maximum common white prefix
227 prefix := block[0][0:indentLen(block[0])]
228 for _, line := range block {
230 prefix = commonPrefix(prefix, line[0:indentLen(line)])
236 for i, line := range block {
243 // heading returns the trimmed line if it passes as a section heading;
244 // otherwise it returns the empty string.
245 func heading(line string) string {
246 line = strings.TrimSpace(line)
251 // a heading must start with an uppercase letter
252 r, _ := utf8.DecodeRuneInString(line)
253 if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
257 // it must end in a letter or digit:
258 r, _ = utf8.DecodeLastRuneInString(line)
259 if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
263 // exclude lines with illegal characters. we allow "(),"
264 if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
268 // allow "'" for possessive "'s" only
270 i := strings.IndexRune(b, '\'')
274 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
275 return "" // not followed by "s "
280 // allow "." when followed by non-space
282 i := strings.IndexRune(b, '.')
286 if i+1 >= len(b) || b[i+1] == ' ' {
287 return "" // not followed by non-space
308 func blocks(text string) []block {
314 lastWasHeading = false
319 out = append(out, block{opPara, para})
324 lines := strings.SplitAfter(text, "\n")
326 for i := 0; i < len(lines); {
335 if indentLen(line) > 0 {
339 // count indented or blank lines
341 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
344 // but not trailing blank lines
345 for j > i && isBlank(lines[j-1]) {
353 // put those lines in a pre block
354 out = append(out, block{opPre, pre})
355 lastWasHeading = false
359 if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
360 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
361 // current line is non-blank, surrounded by blank lines
362 // and the next non-blank line is not indented: this
363 // might be a heading.
364 if head := heading(line); head != "" {
366 out = append(out, block{opHead, []string{head}})
368 lastWasHeading = true
375 lastWasHeading = false
376 para = append(para, lines[i])