1 /*---------------------------------------------------------------------------------------------
2 * Copyright (c) Microsoft Corporation. All rights reserved.
3 * Licensed under the MIT License. See License.txt in the project root for license information.
4 *--------------------------------------------------------------------------------------------*/
7 * Creates a JSON scanner on the given text.
8 * If ignoreTrivia is set, whitespaces or comments are ignored.
10 export function createScanner(text, ignoreTrivia) {
11 if (ignoreTrivia === void 0) { ignoreTrivia = false; }
12 var len = text.length;
13 var pos = 0, value = '', tokenOffset = 0, token = 16 /* Unknown */, lineNumber = 0, lineStartOffset = 0, tokenLineStartOffset = 0, prevTokenLineStartOffset = 0, scanError = 0 /* None */;
14 function scanHexDigits(count, exact) {
17 while (digits < count || !exact) {
18 var ch = text.charCodeAt(pos);
19 if (ch >= 48 /* _0 */ && ch <= 57 /* _9 */) {
20 value = value * 16 + ch - 48 /* _0 */;
22 else if (ch >= 65 /* A */ && ch <= 70 /* F */) {
23 value = value * 16 + ch - 65 /* A */ + 10;
25 else if (ch >= 97 /* a */ && ch <= 102 /* f */) {
26 value = value * 16 + ch - 97 /* a */ + 10;
39 function setPosition(newPosition) {
43 token = 16 /* Unknown */;
44 scanError = 0 /* None */;
46 function scanNumber() {
48 if (text.charCodeAt(pos) === 48 /* _0 */) {
53 while (pos < text.length && isDigit(text.charCodeAt(pos))) {
57 if (pos < text.length && text.charCodeAt(pos) === 46 /* dot */) {
59 if (pos < text.length && isDigit(text.charCodeAt(pos))) {
61 while (pos < text.length && isDigit(text.charCodeAt(pos))) {
66 scanError = 3 /* UnexpectedEndOfNumber */;
67 return text.substring(start, pos);
71 if (pos < text.length && (text.charCodeAt(pos) === 69 /* E */ || text.charCodeAt(pos) === 101 /* e */)) {
73 if (pos < text.length && text.charCodeAt(pos) === 43 /* plus */ || text.charCodeAt(pos) === 45 /* minus */) {
76 if (pos < text.length && isDigit(text.charCodeAt(pos))) {
78 while (pos < text.length && isDigit(text.charCodeAt(pos))) {
84 scanError = 3 /* UnexpectedEndOfNumber */;
87 return text.substring(start, end);
89 function scanString() {
90 var result = '', start = pos;
93 result += text.substring(start, pos);
94 scanError = 2 /* UnexpectedEndOfString */;
97 var ch = text.charCodeAt(pos);
98 if (ch === 34 /* doubleQuote */) {
99 result += text.substring(start, pos);
103 if (ch === 92 /* backslash */) {
104 result += text.substring(start, pos);
107 scanError = 2 /* UnexpectedEndOfString */;
110 var ch2 = text.charCodeAt(pos++);
112 case 34 /* doubleQuote */:
115 case 92 /* backslash */:
137 var ch3 = scanHexDigits(4, true);
139 result += String.fromCharCode(ch3);
142 scanError = 4 /* InvalidUnicode */;
146 scanError = 5 /* InvalidEscapeCharacter */;
151 if (ch >= 0 && ch <= 0x1f) {
152 if (isLineBreak(ch)) {
153 result += text.substring(start, pos);
154 scanError = 2 /* UnexpectedEndOfString */;
158 scanError = 6 /* InvalidCharacter */;
159 // mark as error but continue with string
166 function scanNext() {
168 scanError = 0 /* None */;
170 lineStartOffset = lineNumber;
171 prevTokenLineStartOffset = tokenLineStartOffset;
175 return token = 17 /* EOF */;
177 var code = text.charCodeAt(pos);
178 // trivia: whitespace
179 if (isWhiteSpace(code)) {
182 value += String.fromCharCode(code);
183 code = text.charCodeAt(pos);
184 } while (isWhiteSpace(code));
185 return token = 15 /* Trivia */;
188 if (isLineBreak(code)) {
190 value += String.fromCharCode(code);
191 if (code === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
196 tokenLineStartOffset = pos;
197 return token = 14 /* LineBreakTrivia */;
201 case 123 /* openBrace */:
203 return token = 1 /* OpenBraceToken */;
204 case 125 /* closeBrace */:
206 return token = 2 /* CloseBraceToken */;
207 case 91 /* openBracket */:
209 return token = 3 /* OpenBracketToken */;
210 case 93 /* closeBracket */:
212 return token = 4 /* CloseBracketToken */;
215 return token = 6 /* ColonToken */;
218 return token = 5 /* CommaToken */;
220 case 34 /* doubleQuote */:
222 value = scanString();
223 return token = 10 /* StringLiteral */;
227 // Single-line comment
228 if (text.charCodeAt(pos + 1) === 47 /* slash */) {
231 if (isLineBreak(text.charCodeAt(pos))) {
236 value = text.substring(start, pos);
237 return token = 12 /* LineCommentTrivia */;
239 // Multi-line comment
240 if (text.charCodeAt(pos + 1) === 42 /* asterisk */) {
242 var safeLength = len - 1; // For lookahead.
243 var commentClosed = false;
244 while (pos < safeLength) {
245 var ch = text.charCodeAt(pos);
246 if (ch === 42 /* asterisk */ && text.charCodeAt(pos + 1) === 47 /* slash */) {
248 commentClosed = true;
252 if (isLineBreak(ch)) {
253 if (ch === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
257 tokenLineStartOffset = pos;
260 if (!commentClosed) {
262 scanError = 1 /* UnexpectedEndOfComment */;
264 value = text.substring(start, pos);
265 return token = 13 /* BlockCommentTrivia */;
267 // just a single slash
268 value += String.fromCharCode(code);
270 return token = 16 /* Unknown */;
273 value += String.fromCharCode(code);
275 if (pos === len || !isDigit(text.charCodeAt(pos))) {
276 return token = 16 /* Unknown */;
278 // found a minus, followed by a number so
279 // we fall through to proceed with scanning
291 value += scanNumber();
292 return token = 11 /* NumericLiteral */;
293 // literals and unknown symbols
295 // is a literal? Read the full word.
296 while (pos < len && isUnknownContentCharacter(code)) {
298 code = text.charCodeAt(pos);
300 if (tokenOffset !== pos) {
301 value = text.substring(tokenOffset, pos);
302 // keywords: true, false, null
304 case 'true': return token = 8 /* TrueKeyword */;
305 case 'false': return token = 9 /* FalseKeyword */;
306 case 'null': return token = 7 /* NullKeyword */;
308 return token = 16 /* Unknown */;
311 value += String.fromCharCode(code);
313 return token = 16 /* Unknown */;
316 function isUnknownContentCharacter(code) {
317 if (isWhiteSpace(code) || isLineBreak(code)) {
321 case 125 /* closeBrace */:
322 case 93 /* closeBracket */:
323 case 123 /* openBrace */:
324 case 91 /* openBracket */:
325 case 34 /* doubleQuote */:
333 function scanNextNonTrivia() {
337 } while (result >= 12 /* LineCommentTrivia */ && result <= 15 /* Trivia */);
341 setPosition: setPosition,
342 getPosition: function () { return pos; },
343 scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
344 getToken: function () { return token; },
345 getTokenValue: function () { return value; },
346 getTokenOffset: function () { return tokenOffset; },
347 getTokenLength: function () { return pos - tokenOffset; },
348 getTokenStartLine: function () { return lineStartOffset; },
349 getTokenStartCharacter: function () { return tokenOffset - prevTokenLineStartOffset; },
350 getTokenError: function () { return scanError; },
353 function isWhiteSpace(ch) {
354 return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 11 /* verticalTab */ || ch === 12 /* formFeed */ ||
355 ch === 160 /* nonBreakingSpace */ || ch === 5760 /* ogham */ || ch >= 8192 /* enQuad */ && ch <= 8203 /* zeroWidthSpace */ ||
356 ch === 8239 /* narrowNoBreakSpace */ || ch === 8287 /* mathematicalSpace */ || ch === 12288 /* ideographicSpace */ || ch === 65279 /* byteOrderMark */;
358 function isLineBreak(ch) {
359 return ch === 10 /* lineFeed */ || ch === 13 /* carriageReturn */ || ch === 8232 /* lineSeparator */ || ch === 8233 /* paragraphSeparator */;
361 function isDigit(ch) {
362 return ch >= 48 /* _0 */ && ch <= 57 /* _9 */;