1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 // A Position describes an arbitrary source position in a file, including the
19 // file, line, column, and byte offset.
20 type Position struct {
21 Line int // line in input (starting at 1)
22 LineRune int // rune in line (starting at 1)
23 Byte int // byte in input (starting at 0)
26 // add returns the position at the end of s, assuming it starts at p.
27 func (p Position) add(s string) Position {
29 if n := strings.Count(s, "\n"); n > 0 {
31 s = s[strings.LastIndex(s, "\n")+1:]
34 p.LineRune += utf8.RuneCountInString(s)
38 // An Expr represents an input element.
40 // Span returns the start and end position of the expression,
41 // excluding leading or trailing comments.
42 Span() (start, end Position)
44 // Comment returns the comments attached to the expression.
45 // This method would normally be named 'Comments' but that
46 // would interfere with embedding a type of the same name.
50 // A Comment represents a single // comment.
53 Token string // without trailing newline
54 Suffix bool // an end of line (not whole line) comment
57 // Comments collects the comments associated with an expression.
58 type Comments struct {
59 Before []Comment // whole-line comments before this expression
60 Suffix []Comment // end-of-line comments after this expression
62 // For top-level expressions only, After lists whole-line
63 // comments following the expression.
67 // Comment returns the receiver. This isn't useful by itself, but
68 // a Comments struct is embedded into all the expression
69 // implementation types, and this gives each of those a Comment
70 // method to satisfy the Expr interface.
71 func (c *Comments) Comment() *Comments {
75 // A FileSyntax represents an entire go.mod file.
76 type FileSyntax struct {
77 Name string // file path
82 func (x *FileSyntax) Span() (start, end Position) {
86 start, _ = x.Stmt[0].Span()
87 _, end = x.Stmt[len(x.Stmt)-1].Span()
91 // addLine adds a line containing the given tokens to the file.
93 // If the first token of the hint matches the first token of the
94 // line, the new line is added at the end of the block containing hint,
95 // extracting hint into a new block if it is not yet in one.
97 // If the hint is non-nil buts its first token does not match,
98 // the new line is added after the block containing hint
99 // (or hint itself, if not in a block).
101 // If no hint is provided, addLine appends the line to the end of
102 // the last block with a matching first token,
103 // or to the end of the file if no such block exists.
104 func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
106 // If no hint given, add to the last statement of the given type.
108 for i := len(x.Stmt) - 1; i >= 0; i-- {
110 switch stmt := stmt.(type) {
112 if stmt.Token != nil && stmt.Token[0] == tokens[0] {
117 if stmt.Token[0] == tokens[0] {
125 newLineAfter := func(i int) *Line {
126 new := &Line{Token: tokens}
127 if i == len(x.Stmt) {
128 x.Stmt = append(x.Stmt, new)
130 x.Stmt = append(x.Stmt, nil)
131 copy(x.Stmt[i+2:], x.Stmt[i+1:])
138 for i, stmt := range x.Stmt {
139 switch stmt := stmt.(type) {
142 if stmt.Token == nil || stmt.Token[0] != tokens[0] {
143 return newLineAfter(i)
146 // Convert line to line block.
148 block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
149 stmt.Token = stmt.Token[1:]
151 new := &Line{Token: tokens[1:], InBlock: true}
152 block.Line = append(block.Line, new)
158 if stmt.Token[0] != tokens[0] {
159 return newLineAfter(i)
162 new := &Line{Token: tokens[1:], InBlock: true}
163 stmt.Line = append(stmt.Line, new)
167 for j, line := range stmt.Line {
169 if stmt.Token[0] != tokens[0] {
170 return newLineAfter(i)
173 // Add new line after hint within the block.
174 stmt.Line = append(stmt.Line, nil)
175 copy(stmt.Line[j+2:], stmt.Line[j+1:])
176 new := &Line{Token: tokens[1:], InBlock: true}
185 new := &Line{Token: tokens}
186 x.Stmt = append(x.Stmt, new)
190 func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
197 func (x *FileSyntax) removeLine(line *Line) {
201 // Cleanup cleans up the file syntax x after any edit operations.
202 // To avoid quadratic behavior, removeLine marks the line as dead
203 // by setting line.Token = nil but does not remove it from the slice
204 // in which it appears. After edits have all been indicated,
205 // calling Cleanup cleans out the dead lines.
206 func (x *FileSyntax) Cleanup() {
208 for _, stmt := range x.Stmt {
209 switch stmt := stmt.(type) {
211 if stmt.Token == nil {
216 for _, line := range stmt.Line {
217 if line.Token != nil {
226 // Collapse block into single line.
229 Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
230 Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
231 After: commentsAdd(stmt.Line[0].After, stmt.After),
233 Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
239 stmt.Line = stmt.Line[:ww]
247 func commentsAdd(x, y []Comment) []Comment {
248 return append(x[:len(x):len(x)], y...)
251 func stringsAdd(x, y []string) []string {
252 return append(x[:len(x):len(x)], y...)
255 // A CommentBlock represents a top-level block of comments separate
257 type CommentBlock struct {
262 func (x *CommentBlock) Span() (start, end Position) {
263 return x.Start, x.Start
266 // A Line is a single line of tokens.
275 func (x *Line) Span() (start, end Position) {
276 return x.Start, x.End
279 // A LineBlock is a factored block of lines, like
286 type LineBlock struct {
295 func (x *LineBlock) Span() (start, end Position) {
296 return x.Start, x.RParen.Pos.add(")")
299 // An LParen represents the beginning of a parenthesized line block.
300 // It is a place to store suffix comments.
306 func (x *LParen) Span() (start, end Position) {
307 return x.Pos, x.Pos.add(")")
310 // An RParen represents the end of a parenthesized line block.
311 // It is a place to store whole-line (before) comments.
317 func (x *RParen) Span() (start, end Position) {
318 return x.Pos, x.Pos.add(")")
321 // An input represents a single input file being parsed.
324 filename string // name of input file, for errors
325 complete []byte // entire input
326 remaining []byte // remaining input
327 tokenStart []byte // token being scanned to end of input
328 token token // next token to be returned by lex, peek
329 pos Position // current input position
330 comments []Comment // accumulated comments
333 file *FileSyntax // returned top-level syntax tree
334 parseErrors ErrorList // errors encountered during parsing
336 // Comment assignment state.
337 pre []Expr // all expressions, in preorder traversal
338 post []Expr // all expressions, in postorder traversal
341 func newInput(filename string, data []byte) *input {
346 pos: Position{Line: 1, LineRune: 1, Byte: 0},
350 // parse parses the input file.
351 func parse(file string, data []byte) (f *FileSyntax, err error) {
352 // The parser panics for both routine errors like syntax errors
353 // and for programmer bugs like array index errors.
354 // Turn both into error returns. Catching bug panics is
355 // especially important when processing many files.
356 in := newInput(file, data)
358 if e := recover(); e != nil && e != &in.parseErrors {
359 in.parseErrors = append(in.parseErrors, Error{
360 Filename: in.filename,
362 Err: fmt.Errorf("internal error: %v", e),
365 if err == nil && len(in.parseErrors) > 0 {
370 // Prime the lexer by reading in the first token. It will be available
371 // in the next peek() or lex() call.
374 // Invoke the parser.
376 if len(in.parseErrors) > 0 {
377 return nil, in.parseErrors
379 in.file.Name = in.filename
381 // Assign comments to nearby syntax.
387 // Error is called to report an error.
388 // Error does not return: it panics.
389 func (in *input) Error(s string) {
390 in.parseErrors = append(in.parseErrors, Error{
391 Filename: in.filename,
395 panic(&in.parseErrors)
398 // eof reports whether the input has reached end of file.
399 func (in *input) eof() bool {
400 return len(in.remaining) == 0
403 // peekRune returns the next rune in the input without consuming it.
404 func (in *input) peekRune() int {
405 if len(in.remaining) == 0 {
408 r, _ := utf8.DecodeRune(in.remaining)
412 // peekPrefix reports whether the remaining input begins with the given prefix.
413 func (in *input) peekPrefix(prefix string) bool {
414 // This is like bytes.HasPrefix(in.remaining, []byte(prefix))
415 // but without the allocation of the []byte copy of prefix.
416 for i := 0; i < len(prefix); i++ {
417 if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
424 // readRune consumes and returns the next rune in the input.
425 func (in *input) readRune() int {
426 if len(in.remaining) == 0 {
427 in.Error("internal lexer error: readRune at EOF")
429 r, size := utf8.DecodeRune(in.remaining)
430 in.remaining = in.remaining[size:]
451 _EOF tokenKind = -(iota + 1)
457 // newlines and punctuation tokens are allowed as ASCII codes.
460 func (k tokenKind) isComment() bool {
461 return k == _COMMENT || k == _EOLCOMMENT
464 // isEOL returns whether a token terminates a line.
465 func (k tokenKind) isEOL() bool {
466 return k == _EOF || k == _EOLCOMMENT || k == '\n'
469 // startToken marks the beginning of the next input token.
470 // It must be followed by a call to endToken, once the token's text has
471 // been consumed using readRune.
472 func (in *input) startToken() {
473 in.tokenStart = in.remaining
475 in.token.pos = in.pos
478 // endToken marks the end of an input token.
479 // It records the actual token string in tok.text.
480 func (in *input) endToken(kind tokenKind) {
482 text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
484 in.token.endPos = in.pos
487 // peek returns the kind of the the next token returned by lex.
488 func (in *input) peek() tokenKind {
492 // lex is called from the parser to obtain the next input token.
493 func (in *input) lex() token {
499 // readToken lexes the next token from the text and stores it in in.token.
500 func (in *input) readToken() {
501 // Skip past spaces, stopping at non-space or EOF.
504 if c == ' ' || c == '\t' || c == '\r' {
509 // Comment runs to end of line.
510 if in.peekPrefix("//") {
513 // Is this comment the only thing on its line?
514 // Find the last \n before this // and see if it's all
515 // spaces from there to here.
516 i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
517 suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
522 for len(in.remaining) > 0 && in.readRune() != '\n' {
525 // If we are at top level (not in a statement), hand the comment to
526 // the parser as a _COMMENT token. The grammar is written
527 // to handle top-level comments itself.
529 in.endToken(_COMMENT)
533 // Otherwise, save comment for later attachment to syntax tree.
534 in.endToken(_EOLCOMMENT)
535 in.comments = append(in.comments, Comment{in.token.pos, in.token.text, suffix})
539 if in.peekPrefix("/*") {
540 in.Error("mod files must use // comments (not /* */ comments)")
543 // Found non-space non-comment.
547 // Found the beginning of the next token.
556 // Punctuation tokens.
557 switch c := in.peekRune(); c {
558 case '\n', '(', ')', '[', ']', '{', '}', ',':
560 in.endToken(tokenKind(c))
563 case '"', '`': // quoted string
568 in.pos = in.token.pos
569 in.Error("unexpected EOF in string")
571 if in.peekRune() == '\n' {
572 in.Error("unexpected newline in string")
578 if c == '\\' && quote != '`' {
580 in.pos = in.token.pos
581 in.Error("unexpected EOF in string")
590 // Checked all punctuation. Must be identifier token.
591 if c := in.peekRune(); !isIdent(c) {
592 in.Error(fmt.Sprintf("unexpected input character %#q", c))
595 // Scan over identifier.
596 for isIdent(in.peekRune()) {
597 if in.peekPrefix("//") {
600 if in.peekPrefix("/*") {
601 in.Error("mod files must use // comments (not /* */ comments)")
608 // isIdent reports whether c is an identifier rune.
609 // We treat most printable runes as identifier runes, except for a handful of
610 // ASCII punctuation characters.
611 func isIdent(c int) bool {
612 switch r := rune(c); r {
613 case ' ', '(', ')', '[', ']', '{', '}', ',':
616 return !unicode.IsSpace(r) && unicode.IsPrint(r)
620 // Comment assignment.
621 // We build two lists of all subexpressions, preorder and postorder.
622 // The preorder list is ordered by start location, with outer expressions first.
623 // The postorder list is ordered by end location, with outer expressions last.
624 // We use the preorder list to assign each whole-line comment to the syntax
625 // immediately following it, and we use the postorder list to assign each
626 // end-of-line comment to the syntax immediately preceding it.
628 // order walks the expression adding it and its subexpressions to the
629 // preorder and postorder lists.
630 func (in *input) order(x Expr) {
632 in.pre = append(in.pre, x)
634 switch x := x.(type) {
636 panic(fmt.Errorf("order: unexpected type %T", x))
639 case *LParen, *RParen:
646 for _, stmt := range x.Stmt {
651 for _, l := range x.Line {
657 in.post = append(in.post, x)
661 // assignComments attaches comments to nearby syntax.
662 func (in *input) assignComments() {
665 // Generate preorder and postorder lists.
668 // Split into whole-line comments and suffix comments.
669 var line, suffix []Comment
670 for _, com := range in.comments {
672 suffix = append(suffix, com)
674 line = append(line, com)
679 for _, c := range line {
680 fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
684 // Assign line comments to syntax immediately following.
685 for _, x := range in.pre {
688 fmt.Fprintf(os.Stderr, "pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
691 for len(line) > 0 && start.Byte >= line[0].Start.Byte {
693 fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
695 xcom.Before = append(xcom.Before, line[0])
700 // Remaining line comments go at end of file.
701 in.file.After = append(in.file.After, line...)
704 for _, c := range suffix {
705 fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
709 // Assign suffix comments to syntax immediately before.
710 for i := len(in.post) - 1; i >= 0; i-- {
713 start, end := x.Span()
715 fmt.Fprintf(os.Stderr, "post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
718 // Do not assign suffix comments to end of line block or whole file.
719 // Instead assign them to the last element inside.
725 // Do not assign suffix comments to something that starts
726 // on an earlier line, so that in
731 // we assign the comment to z and not to x ( ... ).
732 if start.Line != end.Line {
736 for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
738 fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
740 xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
741 suffix = suffix[:len(suffix)-1]
745 // We assigned suffix comments in reverse.
746 // If multiple suffix comments were appended to the same
747 // expression node, they are now in reverse. Fix that.
748 for _, x := range in.post {
749 reverseComments(x.Comment().Suffix)
752 // Remaining suffix comments go at beginning of file.
753 in.file.Before = append(in.file.Before, suffix...)
756 // reverseComments reverses the []Comment list.
757 func reverseComments(list []Comment) {
758 for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
759 list[i], list[j] = list[j], list[i]
763 func (in *input) parseFile() {
764 in.file = new(FileSyntax)
771 in.file.Stmt = append(in.file.Stmt, cb)
777 cb = &CommentBlock{Start: tok.pos}
780 com.Before = append(com.Before, Comment{Start: tok.pos, Token: tok.text})
783 in.file.Stmt = append(in.file.Stmt, cb)
789 in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
796 func (in *input) parseStmt() {
800 tokens := []string{tok.text}
804 case tok.kind.isEOL():
805 in.file.Stmt = append(in.file.Stmt, &Line{
812 case tok.kind == '(':
813 if next := in.peek(); next.isEOL() {
814 // Start of block: no more tokens on this line.
815 in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, tokens, tok))
817 } else if next == ')' {
819 if in.peek().isEOL() {
822 in.file.Stmt = append(in.file.Stmt, &LineBlock{
825 LParen: LParen{Pos: tok.pos},
826 RParen: RParen{Pos: rparen.pos},
830 // '( )' in the middle of the line, not a block.
831 tokens = append(tokens, tok.text, rparen.text)
833 // '(' in the middle of the line, not a block.
834 tokens = append(tokens, tok.text)
838 tokens = append(tokens, tok.text)
844 func (in *input) parseLineBlock(start Position, token []string, lparen token) *LineBlock {
848 LParen: LParen{Pos: lparen.pos},
850 var comments []Comment
854 // Suffix comment, will be attached later by assignComments.
857 // Blank line. Add an empty comment to preserve it.
859 if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
860 comments = append(comments, Comment{})
864 comments = append(comments, Comment{Start: tok.pos, Token: tok.text})
866 in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
869 x.RParen.Before = comments
870 x.RParen.Pos = rparen.pos
871 if !in.peek().isEOL() {
872 in.Error("syntax error (expected newline after closing paren)")
878 x.Line = append(x.Line, l)
879 l.Comment().Before = comments
885 func (in *input) parseLine() *Line {
887 if tok.kind.isEOL() {
888 in.Error("internal parse error: parseLine at end of line")
892 tokens := []string{tok.text}
895 if tok.kind.isEOL() {
903 tokens = append(tokens, tok.text)
909 slashSlash = []byte("//")
910 moduleStr = []byte("module")
913 // ModulePath returns the module path from the gomod file text.
914 // If it cannot find a module path, it returns an empty string.
915 // It is tolerant of unrelated problems in the go.mod file.
916 func ModulePath(mod []byte) string {
920 if i := bytes.IndexByte(line, '\n'); i >= 0 {
921 line, mod = line[:i], line[i+1:]
923 if i := bytes.Index(line, slashSlash); i >= 0 {
926 line = bytes.TrimSpace(line)
927 if !bytes.HasPrefix(line, moduleStr) {
930 line = line[len(moduleStr):]
932 line = bytes.TrimSpace(line)
933 if len(line) == n || len(line) == 0 {
937 if line[0] == '"' || line[0] == '`' {
938 p, err := strconv.Unquote(string(line))
940 return "" // malformed quoted string or multiline module path
947 return "" // missing module path