1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 // A Position describes an arbitrary source position in a file, including the
19 // file, line, column, and byte offset.
20 type Position struct {
21 Line int // line in input (starting at 1)
22 LineRune int // rune in line (starting at 1)
23 Byte int // byte in input (starting at 0)
26 // add returns the position at the end of s, assuming it starts at p.
27 func (p Position) add(s string) Position {
29 if n := strings.Count(s, "\n"); n > 0 {
31 s = s[strings.LastIndex(s, "\n")+1:]
34 p.LineRune += utf8.RuneCountInString(s)
38 // An Expr represents an input element.
40 // Span returns the start and end position of the expression,
41 // excluding leading or trailing comments.
42 Span() (start, end Position)
44 // Comment returns the comments attached to the expression.
45 // This method would normally be named 'Comments' but that
46 // would interfere with embedding a type of the same name.
50 // A Comment represents a single // comment.
53 Token string // without trailing newline
54 Suffix bool // an end of line (not whole line) comment
57 // Comments collects the comments associated with an expression.
58 type Comments struct {
59 Before []Comment // whole-line comments before this expression
60 Suffix []Comment // end-of-line comments after this expression
62 // For top-level expressions only, After lists whole-line
63 // comments following the expression.
67 // Comment returns the receiver. This isn't useful by itself, but
68 // a Comments struct is embedded into all the expression
69 // implementation types, and this gives each of those a Comment
70 // method to satisfy the Expr interface.
71 func (c *Comments) Comment() *Comments {
75 // A FileSyntax represents an entire go.mod file.
76 type FileSyntax struct {
77 Name string // file path
82 func (x *FileSyntax) Span() (start, end Position) {
86 start, _ = x.Stmt[0].Span()
87 _, end = x.Stmt[len(x.Stmt)-1].Span()
91 // addLine adds a line containing the given tokens to the file.
93 // If the first token of the hint matches the first token of the
94 // line, the new line is added at the end of the block containing hint,
95 // extracting hint into a new block if it is not yet in one.
97 // If the hint is non-nil buts its first token does not match,
98 // the new line is added after the block containing hint
99 // (or hint itself, if not in a block).
101 // If no hint is provided, addLine appends the line to the end of
102 // the last block with a matching first token,
103 // or to the end of the file if no such block exists.
104 func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
106 // If no hint given, add to the last statement of the given type.
108 for i := len(x.Stmt) - 1; i >= 0; i-- {
110 switch stmt := stmt.(type) {
112 if stmt.Token != nil && stmt.Token[0] == tokens[0] {
117 if stmt.Token[0] == tokens[0] {
125 newLineAfter := func(i int) *Line {
126 new := &Line{Token: tokens}
127 if i == len(x.Stmt) {
128 x.Stmt = append(x.Stmt, new)
130 x.Stmt = append(x.Stmt, nil)
131 copy(x.Stmt[i+2:], x.Stmt[i+1:])
138 for i, stmt := range x.Stmt {
139 switch stmt := stmt.(type) {
142 if stmt.Token == nil || stmt.Token[0] != tokens[0] {
143 return newLineAfter(i)
146 // Convert line to line block.
148 block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
149 stmt.Token = stmt.Token[1:]
151 new := &Line{Token: tokens[1:], InBlock: true}
152 block.Line = append(block.Line, new)
158 if stmt.Token[0] != tokens[0] {
159 return newLineAfter(i)
162 new := &Line{Token: tokens[1:], InBlock: true}
163 stmt.Line = append(stmt.Line, new)
167 for j, line := range stmt.Line {
169 if stmt.Token[0] != tokens[0] {
170 return newLineAfter(i)
173 // Add new line after hint within the block.
174 stmt.Line = append(stmt.Line, nil)
175 copy(stmt.Line[j+2:], stmt.Line[j+1:])
176 new := &Line{Token: tokens[1:], InBlock: true}
185 new := &Line{Token: tokens}
186 x.Stmt = append(x.Stmt, new)
190 func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
197 func (x *FileSyntax) removeLine(line *Line) {
201 // Cleanup cleans up the file syntax x after any edit operations.
202 // To avoid quadratic behavior, removeLine marks the line as dead
203 // by setting line.Token = nil but does not remove it from the slice
204 // in which it appears. After edits have all been indicated,
205 // calling Cleanup cleans out the dead lines.
206 func (x *FileSyntax) Cleanup() {
208 for _, stmt := range x.Stmt {
209 switch stmt := stmt.(type) {
211 if stmt.Token == nil {
216 for _, line := range stmt.Line {
217 if line.Token != nil {
226 // Collapse block into single line.
229 Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
230 Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
231 After: commentsAdd(stmt.Line[0].After, stmt.After),
233 Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
239 stmt.Line = stmt.Line[:ww]
247 func commentsAdd(x, y []Comment) []Comment {
248 return append(x[:len(x):len(x)], y...)
251 func stringsAdd(x, y []string) []string {
252 return append(x[:len(x):len(x)], y...)
255 // A CommentBlock represents a top-level block of comments separate
257 type CommentBlock struct {
262 func (x *CommentBlock) Span() (start, end Position) {
263 return x.Start, x.Start
266 // A Line is a single line of tokens.
275 func (x *Line) Span() (start, end Position) {
276 return x.Start, x.End
279 // A LineBlock is a factored block of lines, like
286 type LineBlock struct {
295 func (x *LineBlock) Span() (start, end Position) {
296 return x.Start, x.RParen.Pos.add(")")
299 // An LParen represents the beginning of a parenthesized line block.
300 // It is a place to store suffix comments.
306 func (x *LParen) Span() (start, end Position) {
307 return x.Pos, x.Pos.add(")")
310 // An RParen represents the end of a parenthesized line block.
311 // It is a place to store whole-line (before) comments.
317 func (x *RParen) Span() (start, end Position) {
318 return x.Pos, x.Pos.add(")")
321 // An input represents a single input file being parsed.
324 filename string // name of input file, for errors
325 complete []byte // entire input
326 remaining []byte // remaining input
327 tokenStart []byte // token being scanned to end of input
328 token token // next token to be returned by lex, peek
329 pos Position // current input position
330 comments []Comment // accumulated comments
333 file *FileSyntax // returned top-level syntax tree
334 parseErrors ErrorList // errors encountered during parsing
336 // Comment assignment state.
337 pre []Expr // all expressions, in preorder traversal
338 post []Expr // all expressions, in postorder traversal
341 func newInput(filename string, data []byte) *input {
346 pos: Position{Line: 1, LineRune: 1, Byte: 0},
350 // parse parses the input file.
351 func parse(file string, data []byte) (f *FileSyntax, err error) {
352 // The parser panics for both routine errors like syntax errors
353 // and for programmer bugs like array index errors.
354 // Turn both into error returns. Catching bug panics is
355 // especially important when processing many files.
356 in := newInput(file, data)
358 if e := recover(); e != nil && e != &in.parseErrors {
359 in.parseErrors = append(in.parseErrors, Error{
360 Filename: in.filename,
362 Err: fmt.Errorf("internal error: %v", e),
365 if err == nil && len(in.parseErrors) > 0 {
370 // Prime the lexer by reading in the first token. It will be available
371 // in the next peek() or lex() call.
374 // Invoke the parser.
376 if len(in.parseErrors) > 0 {
377 return nil, in.parseErrors
379 in.file.Name = in.filename
381 // Assign comments to nearby syntax.
387 // Error is called to report an error.
388 // Error does not return: it panics.
389 func (in *input) Error(s string) {
390 in.parseErrors = append(in.parseErrors, Error{
391 Filename: in.filename,
395 panic(&in.parseErrors)
398 // eof reports whether the input has reached end of file.
399 func (in *input) eof() bool {
400 return len(in.remaining) == 0
403 // peekRune returns the next rune in the input without consuming it.
404 func (in *input) peekRune() int {
405 if len(in.remaining) == 0 {
408 r, _ := utf8.DecodeRune(in.remaining)
412 // peekPrefix reports whether the remaining input begins with the given prefix.
413 func (in *input) peekPrefix(prefix string) bool {
414 // This is like bytes.HasPrefix(in.remaining, []byte(prefix))
415 // but without the allocation of the []byte copy of prefix.
416 for i := 0; i < len(prefix); i++ {
417 if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
424 // readRune consumes and returns the next rune in the input.
425 func (in *input) readRune() int {
426 if len(in.remaining) == 0 {
427 in.Error("internal lexer error: readRune at EOF")
429 r, size := utf8.DecodeRune(in.remaining)
430 in.remaining = in.remaining[size:]
451 _EOF tokenKind = -(iota + 1)
457 // newlines and punctuation tokens are allowed as ASCII codes.
460 func (k tokenKind) isComment() bool {
461 return k == _COMMENT || k == _EOLCOMMENT
464 // isEOL returns whether a token terminates a line.
465 func (k tokenKind) isEOL() bool {
466 return k == _EOF || k == _EOLCOMMENT || k == '\n'
469 // startToken marks the beginning of the next input token.
470 // It must be followed by a call to endToken, once the token's text has
471 // been consumed using readRune.
472 func (in *input) startToken() {
473 in.tokenStart = in.remaining
475 in.token.pos = in.pos
478 // endToken marks the end of an input token.
479 // It records the actual token string in tok.text.
480 // A single trailing newline (LF or CRLF) will be removed from comment tokens.
481 func (in *input) endToken(kind tokenKind) {
483 text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
484 if kind.isComment() {
485 if strings.HasSuffix(text, "\r\n") {
486 text = text[:len(text)-2]
488 text = strings.TrimSuffix(text, "\n")
492 in.token.endPos = in.pos
495 // peek returns the kind of the the next token returned by lex.
496 func (in *input) peek() tokenKind {
500 // lex is called from the parser to obtain the next input token.
501 func (in *input) lex() token {
507 // readToken lexes the next token from the text and stores it in in.token.
508 func (in *input) readToken() {
509 // Skip past spaces, stopping at non-space or EOF.
512 if c == ' ' || c == '\t' || c == '\r' {
517 // Comment runs to end of line.
518 if in.peekPrefix("//") {
521 // Is this comment the only thing on its line?
522 // Find the last \n before this // and see if it's all
523 // spaces from there to here.
524 i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
525 suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
530 for len(in.remaining) > 0 && in.readRune() != '\n' {
533 // If we are at top level (not in a statement), hand the comment to
534 // the parser as a _COMMENT token. The grammar is written
535 // to handle top-level comments itself.
537 in.endToken(_COMMENT)
541 // Otherwise, save comment for later attachment to syntax tree.
542 in.endToken(_EOLCOMMENT)
543 in.comments = append(in.comments, Comment{in.token.pos, in.token.text, suffix})
547 if in.peekPrefix("/*") {
548 in.Error("mod files must use // comments (not /* */ comments)")
551 // Found non-space non-comment.
555 // Found the beginning of the next token.
564 // Punctuation tokens.
565 switch c := in.peekRune(); c {
566 case '\n', '(', ')', '[', ']', '{', '}', ',':
568 in.endToken(tokenKind(c))
571 case '"', '`': // quoted string
576 in.pos = in.token.pos
577 in.Error("unexpected EOF in string")
579 if in.peekRune() == '\n' {
580 in.Error("unexpected newline in string")
586 if c == '\\' && quote != '`' {
588 in.pos = in.token.pos
589 in.Error("unexpected EOF in string")
598 // Checked all punctuation. Must be identifier token.
599 if c := in.peekRune(); !isIdent(c) {
600 in.Error(fmt.Sprintf("unexpected input character %#q", c))
603 // Scan over identifier.
604 for isIdent(in.peekRune()) {
605 if in.peekPrefix("//") {
608 if in.peekPrefix("/*") {
609 in.Error("mod files must use // comments (not /* */ comments)")
616 // isIdent reports whether c is an identifier rune.
617 // We treat most printable runes as identifier runes, except for a handful of
618 // ASCII punctuation characters.
619 func isIdent(c int) bool {
620 switch r := rune(c); r {
621 case ' ', '(', ')', '[', ']', '{', '}', ',':
624 return !unicode.IsSpace(r) && unicode.IsPrint(r)
628 // Comment assignment.
629 // We build two lists of all subexpressions, preorder and postorder.
630 // The preorder list is ordered by start location, with outer expressions first.
631 // The postorder list is ordered by end location, with outer expressions last.
632 // We use the preorder list to assign each whole-line comment to the syntax
633 // immediately following it, and we use the postorder list to assign each
634 // end-of-line comment to the syntax immediately preceding it.
636 // order walks the expression adding it and its subexpressions to the
637 // preorder and postorder lists.
638 func (in *input) order(x Expr) {
640 in.pre = append(in.pre, x)
642 switch x := x.(type) {
644 panic(fmt.Errorf("order: unexpected type %T", x))
647 case *LParen, *RParen:
654 for _, stmt := range x.Stmt {
659 for _, l := range x.Line {
665 in.post = append(in.post, x)
669 // assignComments attaches comments to nearby syntax.
670 func (in *input) assignComments() {
673 // Generate preorder and postorder lists.
676 // Split into whole-line comments and suffix comments.
677 var line, suffix []Comment
678 for _, com := range in.comments {
680 suffix = append(suffix, com)
682 line = append(line, com)
687 for _, c := range line {
688 fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
692 // Assign line comments to syntax immediately following.
693 for _, x := range in.pre {
696 fmt.Fprintf(os.Stderr, "pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
699 for len(line) > 0 && start.Byte >= line[0].Start.Byte {
701 fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
703 xcom.Before = append(xcom.Before, line[0])
708 // Remaining line comments go at end of file.
709 in.file.After = append(in.file.After, line...)
712 for _, c := range suffix {
713 fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
717 // Assign suffix comments to syntax immediately before.
718 for i := len(in.post) - 1; i >= 0; i-- {
721 start, end := x.Span()
723 fmt.Fprintf(os.Stderr, "post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
726 // Do not assign suffix comments to end of line block or whole file.
727 // Instead assign them to the last element inside.
733 // Do not assign suffix comments to something that starts
734 // on an earlier line, so that in
739 // we assign the comment to z and not to x ( ... ).
740 if start.Line != end.Line {
744 for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
746 fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
748 xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
749 suffix = suffix[:len(suffix)-1]
753 // We assigned suffix comments in reverse.
754 // If multiple suffix comments were appended to the same
755 // expression node, they are now in reverse. Fix that.
756 for _, x := range in.post {
757 reverseComments(x.Comment().Suffix)
760 // Remaining suffix comments go at beginning of file.
761 in.file.Before = append(in.file.Before, suffix...)
764 // reverseComments reverses the []Comment list.
765 func reverseComments(list []Comment) {
766 for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
767 list[i], list[j] = list[j], list[i]
771 func (in *input) parseFile() {
772 in.file = new(FileSyntax)
779 in.file.Stmt = append(in.file.Stmt, cb)
785 cb = &CommentBlock{Start: tok.pos}
788 com.Before = append(com.Before, Comment{Start: tok.pos, Token: tok.text})
791 in.file.Stmt = append(in.file.Stmt, cb)
797 in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
804 func (in *input) parseStmt() {
808 tokens := []string{tok.text}
812 case tok.kind.isEOL():
813 in.file.Stmt = append(in.file.Stmt, &Line{
820 case tok.kind == '(':
821 if next := in.peek(); next.isEOL() {
822 // Start of block: no more tokens on this line.
823 in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, tokens, tok))
825 } else if next == ')' {
827 if in.peek().isEOL() {
830 in.file.Stmt = append(in.file.Stmt, &LineBlock{
833 LParen: LParen{Pos: tok.pos},
834 RParen: RParen{Pos: rparen.pos},
838 // '( )' in the middle of the line, not a block.
839 tokens = append(tokens, tok.text, rparen.text)
841 // '(' in the middle of the line, not a block.
842 tokens = append(tokens, tok.text)
846 tokens = append(tokens, tok.text)
852 func (in *input) parseLineBlock(start Position, token []string, lparen token) *LineBlock {
856 LParen: LParen{Pos: lparen.pos},
858 var comments []Comment
862 // Suffix comment, will be attached later by assignComments.
865 // Blank line. Add an empty comment to preserve it.
867 if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
868 comments = append(comments, Comment{})
872 comments = append(comments, Comment{Start: tok.pos, Token: tok.text})
874 in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
877 x.RParen.Before = comments
878 x.RParen.Pos = rparen.pos
879 if !in.peek().isEOL() {
880 in.Error("syntax error (expected newline after closing paren)")
886 x.Line = append(x.Line, l)
887 l.Comment().Before = comments
893 func (in *input) parseLine() *Line {
895 if tok.kind.isEOL() {
896 in.Error("internal parse error: parseLine at end of line")
900 tokens := []string{tok.text}
903 if tok.kind.isEOL() {
911 tokens = append(tokens, tok.text)
917 slashSlash = []byte("//")
918 moduleStr = []byte("module")
921 // ModulePath returns the module path from the gomod file text.
922 // If it cannot find a module path, it returns an empty string.
923 // It is tolerant of unrelated problems in the go.mod file.
924 func ModulePath(mod []byte) string {
928 if i := bytes.IndexByte(line, '\n'); i >= 0 {
929 line, mod = line[:i], line[i+1:]
931 if i := bytes.Index(line, slashSlash); i >= 0 {
934 line = bytes.TrimSpace(line)
935 if !bytes.HasPrefix(line, moduleStr) {
938 line = line[len(moduleStr):]
940 line = bytes.TrimSpace(line)
941 if len(line) == n || len(line) == 0 {
945 if line[0] == '"' || line[0] == '`' {
946 p, err := strconv.Unquote(string(line))
948 return "" // malformed quoted string or multiline module path
955 return "" // missing module path