.gitignore added
[dotfiles/.git] / .config / coc / extensions / node_modules / coc-prettier / node_modules / htmlparser2 / lib / Tokenizer.js
1 module.exports = Tokenizer;
2
3 var decodeCodePoint = require("entities/lib/decode_codepoint.js");
4 var entityMap = require("entities/maps/entities.json");
5 var legacyMap = require("entities/maps/legacy.json");
6 var xmlMap = require("entities/maps/xml.json");
7
8 var i = 0;
9
10 var TEXT = i++;
11 var BEFORE_TAG_NAME = i++; //after <
12 var IN_TAG_NAME = i++;
13 var IN_SELF_CLOSING_TAG = i++;
14 var BEFORE_CLOSING_TAG_NAME = i++;
15 var IN_CLOSING_TAG_NAME = i++;
16 var AFTER_CLOSING_TAG_NAME = i++;
17
18 //attributes
19 var BEFORE_ATTRIBUTE_NAME = i++;
20 var IN_ATTRIBUTE_NAME = i++;
21 var AFTER_ATTRIBUTE_NAME = i++;
22 var BEFORE_ATTRIBUTE_VALUE = i++;
23 var IN_ATTRIBUTE_VALUE_DQ = i++; // "
24 var IN_ATTRIBUTE_VALUE_SQ = i++; // '
25 var IN_ATTRIBUTE_VALUE_NQ = i++;
26
27 //declarations
28 var BEFORE_DECLARATION = i++; // !
29 var IN_DECLARATION = i++;
30
31 //processing instructions
32 var IN_PROCESSING_INSTRUCTION = i++; // ?
33
34 //comments
35 var BEFORE_COMMENT = i++;
36 var IN_COMMENT = i++;
37 var AFTER_COMMENT_1 = i++;
38 var AFTER_COMMENT_2 = i++;
39
40 //cdata
41 var BEFORE_CDATA_1 = i++; // [
42 var BEFORE_CDATA_2 = i++; // C
43 var BEFORE_CDATA_3 = i++; // D
44 var BEFORE_CDATA_4 = i++; // A
45 var BEFORE_CDATA_5 = i++; // T
46 var BEFORE_CDATA_6 = i++; // A
47 var IN_CDATA = i++; // [
48 var AFTER_CDATA_1 = i++; // ]
49 var AFTER_CDATA_2 = i++; // ]
50
51 //special tags
52 var BEFORE_SPECIAL = i++; //S
53 var BEFORE_SPECIAL_END = i++; //S
54
55 var BEFORE_SCRIPT_1 = i++; //C
56 var BEFORE_SCRIPT_2 = i++; //R
57 var BEFORE_SCRIPT_3 = i++; //I
58 var BEFORE_SCRIPT_4 = i++; //P
59 var BEFORE_SCRIPT_5 = i++; //T
60 var AFTER_SCRIPT_1 = i++; //C
61 var AFTER_SCRIPT_2 = i++; //R
62 var AFTER_SCRIPT_3 = i++; //I
63 var AFTER_SCRIPT_4 = i++; //P
64 var AFTER_SCRIPT_5 = i++; //T
65
66 var BEFORE_STYLE_1 = i++; //T
67 var BEFORE_STYLE_2 = i++; //Y
68 var BEFORE_STYLE_3 = i++; //L
69 var BEFORE_STYLE_4 = i++; //E
70 var AFTER_STYLE_1 = i++; //T
71 var AFTER_STYLE_2 = i++; //Y
72 var AFTER_STYLE_3 = i++; //L
73 var AFTER_STYLE_4 = i++; //E
74
75 var BEFORE_ENTITY = i++; //&
76 var BEFORE_NUMERIC_ENTITY = i++; //#
77 var IN_NAMED_ENTITY = i++;
78 var IN_NUMERIC_ENTITY = i++;
79 var IN_HEX_ENTITY = i++; //X
80
81 var j = 0;
82
83 var SPECIAL_NONE = j++;
84 var SPECIAL_SCRIPT = j++;
85 var SPECIAL_STYLE = j++;
86
87 function whitespace(c) {
88     return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
89 }
90
91 function ifElseState(upper, SUCCESS, FAILURE) {
92     var lower = upper.toLowerCase();
93
94     if (upper === lower) {
95         return function(c) {
96             if (c === lower) {
97                 this._state = SUCCESS;
98             } else {
99                 this._state = FAILURE;
100                 this._index--;
101             }
102         };
103     } else {
104         return function(c) {
105             if (c === lower || c === upper) {
106                 this._state = SUCCESS;
107             } else {
108                 this._state = FAILURE;
109                 this._index--;
110             }
111         };
112     }
113 }
114
115 function consumeSpecialNameChar(upper, NEXT_STATE) {
116     var lower = upper.toLowerCase();
117
118     return function(c) {
119         if (c === lower || c === upper) {
120             this._state = NEXT_STATE;
121         } else {
122             this._state = IN_TAG_NAME;
123             this._index--; //consume the token again
124         }
125     };
126 }
127
128 function Tokenizer(options, cbs) {
129     this._state = TEXT;
130     this._buffer = "";
131     this._sectionStart = 0;
132     this._index = 0;
133     this._bufferOffset = 0; //chars removed from _buffer
134     this._baseState = TEXT;
135     this._special = SPECIAL_NONE;
136     this._cbs = cbs;
137     this._running = true;
138     this._ended = false;
139     this._xmlMode = !!(options && options.xmlMode);
140     this._decodeEntities = !!(options && options.decodeEntities);
141 }
142
143 Tokenizer.prototype._stateText = function(c) {
144     if (c === "<") {
145         if (this._index > this._sectionStart) {
146             this._cbs.ontext(this._getSection());
147         }
148         this._state = BEFORE_TAG_NAME;
149         this._sectionStart = this._index;
150     } else if (
151         this._decodeEntities &&
152         this._special === SPECIAL_NONE &&
153         c === "&"
154     ) {
155         if (this._index > this._sectionStart) {
156             this._cbs.ontext(this._getSection());
157         }
158         this._baseState = TEXT;
159         this._state = BEFORE_ENTITY;
160         this._sectionStart = this._index;
161     }
162 };
163
164 Tokenizer.prototype._stateBeforeTagName = function(c) {
165     if (c === "/") {
166         this._state = BEFORE_CLOSING_TAG_NAME;
167     } else if (c === "<") {
168         this._cbs.ontext(this._getSection());
169         this._sectionStart = this._index;
170     } else if (c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
171         this._state = TEXT;
172     } else if (c === "!") {
173         this._state = BEFORE_DECLARATION;
174         this._sectionStart = this._index + 1;
175     } else if (c === "?") {
176         this._state = IN_PROCESSING_INSTRUCTION;
177         this._sectionStart = this._index + 1;
178     } else {
179         this._state =
180             !this._xmlMode && (c === "s" || c === "S")
181                 ? BEFORE_SPECIAL
182                 : IN_TAG_NAME;
183         this._sectionStart = this._index;
184     }
185 };
186
187 Tokenizer.prototype._stateInTagName = function(c) {
188     if (c === "/" || c === ">" || whitespace(c)) {
189         this._emitToken("onopentagname");
190         this._state = BEFORE_ATTRIBUTE_NAME;
191         this._index--;
192     }
193 };
194
195 Tokenizer.prototype._stateBeforeCloseingTagName = function(c) {
196     if (whitespace(c));
197     else if (c === ">") {
198         this._state = TEXT;
199     } else if (this._special !== SPECIAL_NONE) {
200         if (c === "s" || c === "S") {
201             this._state = BEFORE_SPECIAL_END;
202         } else {
203             this._state = TEXT;
204             this._index--;
205         }
206     } else {
207         this._state = IN_CLOSING_TAG_NAME;
208         this._sectionStart = this._index;
209     }
210 };
211
212 Tokenizer.prototype._stateInCloseingTagName = function(c) {
213     if (c === ">" || whitespace(c)) {
214         this._emitToken("onclosetag");
215         this._state = AFTER_CLOSING_TAG_NAME;
216         this._index--;
217     }
218 };
219
220 Tokenizer.prototype._stateAfterCloseingTagName = function(c) {
221     //skip everything until ">"
222     if (c === ">") {
223         this._state = TEXT;
224         this._sectionStart = this._index + 1;
225     }
226 };
227
228 Tokenizer.prototype._stateBeforeAttributeName = function(c) {
229     if (c === ">") {
230         this._cbs.onopentagend();
231         this._state = TEXT;
232         this._sectionStart = this._index + 1;
233     } else if (c === "/") {
234         this._state = IN_SELF_CLOSING_TAG;
235     } else if (!whitespace(c)) {
236         this._state = IN_ATTRIBUTE_NAME;
237         this._sectionStart = this._index;
238     }
239 };
240
241 Tokenizer.prototype._stateInSelfClosingTag = function(c) {
242     if (c === ">") {
243         this._cbs.onselfclosingtag();
244         this._state = TEXT;
245         this._sectionStart = this._index + 1;
246     } else if (!whitespace(c)) {
247         this._state = BEFORE_ATTRIBUTE_NAME;
248         this._index--;
249     }
250 };
251
252 Tokenizer.prototype._stateInAttributeName = function(c) {
253     if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
254         this._cbs.onattribname(this._getSection());
255         this._sectionStart = -1;
256         this._state = AFTER_ATTRIBUTE_NAME;
257         this._index--;
258     }
259 };
260
261 Tokenizer.prototype._stateAfterAttributeName = function(c) {
262     if (c === "=") {
263         this._state = BEFORE_ATTRIBUTE_VALUE;
264     } else if (c === "/" || c === ">") {
265         this._cbs.onattribend();
266         this._state = BEFORE_ATTRIBUTE_NAME;
267         this._index--;
268     } else if (!whitespace(c)) {
269         this._cbs.onattribend();
270         this._state = IN_ATTRIBUTE_NAME;
271         this._sectionStart = this._index;
272     }
273 };
274
275 Tokenizer.prototype._stateBeforeAttributeValue = function(c) {
276     if (c === '"') {
277         this._state = IN_ATTRIBUTE_VALUE_DQ;
278         this._sectionStart = this._index + 1;
279     } else if (c === "'") {
280         this._state = IN_ATTRIBUTE_VALUE_SQ;
281         this._sectionStart = this._index + 1;
282     } else if (!whitespace(c)) {
283         this._state = IN_ATTRIBUTE_VALUE_NQ;
284         this._sectionStart = this._index;
285         this._index--; //reconsume token
286     }
287 };
288
289 Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c) {
290     if (c === '"') {
291         this._emitToken("onattribdata");
292         this._cbs.onattribend();
293         this._state = BEFORE_ATTRIBUTE_NAME;
294     } else if (this._decodeEntities && c === "&") {
295         this._emitToken("onattribdata");
296         this._baseState = this._state;
297         this._state = BEFORE_ENTITY;
298         this._sectionStart = this._index;
299     }
300 };
301
302 Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c) {
303     if (c === "'") {
304         this._emitToken("onattribdata");
305         this._cbs.onattribend();
306         this._state = BEFORE_ATTRIBUTE_NAME;
307     } else if (this._decodeEntities && c === "&") {
308         this._emitToken("onattribdata");
309         this._baseState = this._state;
310         this._state = BEFORE_ENTITY;
311         this._sectionStart = this._index;
312     }
313 };
314
315 Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c) {
316     if (whitespace(c) || c === ">") {
317         this._emitToken("onattribdata");
318         this._cbs.onattribend();
319         this._state = BEFORE_ATTRIBUTE_NAME;
320         this._index--;
321     } else if (this._decodeEntities && c === "&") {
322         this._emitToken("onattribdata");
323         this._baseState = this._state;
324         this._state = BEFORE_ENTITY;
325         this._sectionStart = this._index;
326     }
327 };
328
329 Tokenizer.prototype._stateBeforeDeclaration = function(c) {
330     this._state =
331         c === "["
332             ? BEFORE_CDATA_1
333             : c === "-"
334                 ? BEFORE_COMMENT
335                 : IN_DECLARATION;
336 };
337
338 Tokenizer.prototype._stateInDeclaration = function(c) {
339     if (c === ">") {
340         this._cbs.ondeclaration(this._getSection());
341         this._state = TEXT;
342         this._sectionStart = this._index + 1;
343     }
344 };
345
346 Tokenizer.prototype._stateInProcessingInstruction = function(c) {
347     if (c === ">") {
348         this._cbs.onprocessinginstruction(this._getSection());
349         this._state = TEXT;
350         this._sectionStart = this._index + 1;
351     }
352 };
353
354 Tokenizer.prototype._stateBeforeComment = function(c) {
355     if (c === "-") {
356         this._state = IN_COMMENT;
357         this._sectionStart = this._index + 1;
358     } else {
359         this._state = IN_DECLARATION;
360     }
361 };
362
363 Tokenizer.prototype._stateInComment = function(c) {
364     if (c === "-") this._state = AFTER_COMMENT_1;
365 };
366
367 Tokenizer.prototype._stateAfterComment1 = function(c) {
368     if (c === "-") {
369         this._state = AFTER_COMMENT_2;
370     } else {
371         this._state = IN_COMMENT;
372     }
373 };
374
375 Tokenizer.prototype._stateAfterComment2 = function(c) {
376     if (c === ">") {
377         //remove 2 trailing chars
378         this._cbs.oncomment(
379             this._buffer.substring(this._sectionStart, this._index - 2)
380         );
381         this._state = TEXT;
382         this._sectionStart = this._index + 1;
383     } else if (c !== "-") {
384         this._state = IN_COMMENT;
385     }
386     // else: stay in AFTER_COMMENT_2 (`--->`)
387 };
388
389 Tokenizer.prototype._stateBeforeCdata1 = ifElseState(
390     "C",
391     BEFORE_CDATA_2,
392     IN_DECLARATION
393 );
394 Tokenizer.prototype._stateBeforeCdata2 = ifElseState(
395     "D",
396     BEFORE_CDATA_3,
397     IN_DECLARATION
398 );
399 Tokenizer.prototype._stateBeforeCdata3 = ifElseState(
400     "A",
401     BEFORE_CDATA_4,
402     IN_DECLARATION
403 );
404 Tokenizer.prototype._stateBeforeCdata4 = ifElseState(
405     "T",
406     BEFORE_CDATA_5,
407     IN_DECLARATION
408 );
409 Tokenizer.prototype._stateBeforeCdata5 = ifElseState(
410     "A",
411     BEFORE_CDATA_6,
412     IN_DECLARATION
413 );
414
415 Tokenizer.prototype._stateBeforeCdata6 = function(c) {
416     if (c === "[") {
417         this._state = IN_CDATA;
418         this._sectionStart = this._index + 1;
419     } else {
420         this._state = IN_DECLARATION;
421         this._index--;
422     }
423 };
424
425 Tokenizer.prototype._stateInCdata = function(c) {
426     if (c === "]") this._state = AFTER_CDATA_1;
427 };
428
429 Tokenizer.prototype._stateAfterCdata1 = function(c) {
430     if (c === "]") this._state = AFTER_CDATA_2;
431     else this._state = IN_CDATA;
432 };
433
434 Tokenizer.prototype._stateAfterCdata2 = function(c) {
435     if (c === ">") {
436         //remove 2 trailing chars
437         this._cbs.oncdata(
438             this._buffer.substring(this._sectionStart, this._index - 2)
439         );
440         this._state = TEXT;
441         this._sectionStart = this._index + 1;
442     } else if (c !== "]") {
443         this._state = IN_CDATA;
444     }
445     //else: stay in AFTER_CDATA_2 (`]]]>`)
446 };
447
448 Tokenizer.prototype._stateBeforeSpecial = function(c) {
449     if (c === "c" || c === "C") {
450         this._state = BEFORE_SCRIPT_1;
451     } else if (c === "t" || c === "T") {
452         this._state = BEFORE_STYLE_1;
453     } else {
454         this._state = IN_TAG_NAME;
455         this._index--; //consume the token again
456     }
457 };
458
459 Tokenizer.prototype._stateBeforeSpecialEnd = function(c) {
460     if (this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")) {
461         this._state = AFTER_SCRIPT_1;
462     } else if (this._special === SPECIAL_STYLE && (c === "t" || c === "T")) {
463         this._state = AFTER_STYLE_1;
464     } else this._state = TEXT;
465 };
466
467 Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar(
468     "R",
469     BEFORE_SCRIPT_2
470 );
471 Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar(
472     "I",
473     BEFORE_SCRIPT_3
474 );
475 Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar(
476     "P",
477     BEFORE_SCRIPT_4
478 );
479 Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar(
480     "T",
481     BEFORE_SCRIPT_5
482 );
483
484 Tokenizer.prototype._stateBeforeScript5 = function(c) {
485     if (c === "/" || c === ">" || whitespace(c)) {
486         this._special = SPECIAL_SCRIPT;
487     }
488     this._state = IN_TAG_NAME;
489     this._index--; //consume the token again
490 };
491
492 Tokenizer.prototype._stateAfterScript1 = ifElseState("R", AFTER_SCRIPT_2, TEXT);
493 Tokenizer.prototype._stateAfterScript2 = ifElseState("I", AFTER_SCRIPT_3, TEXT);
494 Tokenizer.prototype._stateAfterScript3 = ifElseState("P", AFTER_SCRIPT_4, TEXT);
495 Tokenizer.prototype._stateAfterScript4 = ifElseState("T", AFTER_SCRIPT_5, TEXT);
496
497 Tokenizer.prototype._stateAfterScript5 = function(c) {
498     if (c === ">" || whitespace(c)) {
499         this._special = SPECIAL_NONE;
500         this._state = IN_CLOSING_TAG_NAME;
501         this._sectionStart = this._index - 6;
502         this._index--; //reconsume the token
503     } else this._state = TEXT;
504 };
505
506 Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar(
507     "Y",
508     BEFORE_STYLE_2
509 );
510 Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar(
511     "L",
512     BEFORE_STYLE_3
513 );
514 Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar(
515     "E",
516     BEFORE_STYLE_4
517 );
518
519 Tokenizer.prototype._stateBeforeStyle4 = function(c) {
520     if (c === "/" || c === ">" || whitespace(c)) {
521         this._special = SPECIAL_STYLE;
522     }
523     this._state = IN_TAG_NAME;
524     this._index--; //consume the token again
525 };
526
527 Tokenizer.prototype._stateAfterStyle1 = ifElseState("Y", AFTER_STYLE_2, TEXT);
528 Tokenizer.prototype._stateAfterStyle2 = ifElseState("L", AFTER_STYLE_3, TEXT);
529 Tokenizer.prototype._stateAfterStyle3 = ifElseState("E", AFTER_STYLE_4, TEXT);
530
531 Tokenizer.prototype._stateAfterStyle4 = function(c) {
532     if (c === ">" || whitespace(c)) {
533         this._special = SPECIAL_NONE;
534         this._state = IN_CLOSING_TAG_NAME;
535         this._sectionStart = this._index - 5;
536         this._index--; //reconsume the token
537     } else this._state = TEXT;
538 };
539
540 Tokenizer.prototype._stateBeforeEntity = ifElseState(
541     "#",
542     BEFORE_NUMERIC_ENTITY,
543     IN_NAMED_ENTITY
544 );
545 Tokenizer.prototype._stateBeforeNumericEntity = ifElseState(
546     "X",
547     IN_HEX_ENTITY,
548     IN_NUMERIC_ENTITY
549 );
550
551 //for entities terminated with a semicolon
552 Tokenizer.prototype._parseNamedEntityStrict = function() {
553     //offset = 1
554     if (this._sectionStart + 1 < this._index) {
555         var entity = this._buffer.substring(
556                 this._sectionStart + 1,
557                 this._index
558             ),
559             map = this._xmlMode ? xmlMap : entityMap;
560
561         if (map.hasOwnProperty(entity)) {
562             this._emitPartial(map[entity]);
563             this._sectionStart = this._index + 1;
564         }
565     }
566 };
567
568 //parses legacy entities (without trailing semicolon)
569 Tokenizer.prototype._parseLegacyEntity = function() {
570     var start = this._sectionStart + 1,
571         limit = this._index - start;
572
573     if (limit > 6) limit = 6; //the max length of legacy entities is 6
574
575     while (limit >= 2) {
576         //the min length of legacy entities is 2
577         var entity = this._buffer.substr(start, limit);
578
579         if (legacyMap.hasOwnProperty(entity)) {
580             this._emitPartial(legacyMap[entity]);
581             this._sectionStart += limit + 1;
582             return;
583         } else {
584             limit--;
585         }
586     }
587 };
588
589 Tokenizer.prototype._stateInNamedEntity = function(c) {
590     if (c === ";") {
591         this._parseNamedEntityStrict();
592         if (this._sectionStart + 1 < this._index && !this._xmlMode) {
593             this._parseLegacyEntity();
594         }
595         this._state = this._baseState;
596     } else if (
597         (c < "a" || c > "z") &&
598         (c < "A" || c > "Z") &&
599         (c < "0" || c > "9")
600     ) {
601         if (this._xmlMode);
602         else if (this._sectionStart + 1 === this._index);
603         else if (this._baseState !== TEXT) {
604             if (c !== "=") {
605                 this._parseNamedEntityStrict();
606             }
607         } else {
608             this._parseLegacyEntity();
609         }
610
611         this._state = this._baseState;
612         this._index--;
613     }
614 };
615
616 Tokenizer.prototype._decodeNumericEntity = function(offset, base) {
617     var sectionStart = this._sectionStart + offset;
618
619     if (sectionStart !== this._index) {
620         //parse entity
621         var entity = this._buffer.substring(sectionStart, this._index);
622         var parsed = parseInt(entity, base);
623
624         this._emitPartial(decodeCodePoint(parsed));
625         this._sectionStart = this._index;
626     } else {
627         this._sectionStart--;
628     }
629
630     this._state = this._baseState;
631 };
632
633 Tokenizer.prototype._stateInNumericEntity = function(c) {
634     if (c === ";") {
635         this._decodeNumericEntity(2, 10);
636         this._sectionStart++;
637     } else if (c < "0" || c > "9") {
638         if (!this._xmlMode) {
639             this._decodeNumericEntity(2, 10);
640         } else {
641             this._state = this._baseState;
642         }
643         this._index--;
644     }
645 };
646
647 Tokenizer.prototype._stateInHexEntity = function(c) {
648     if (c === ";") {
649         this._decodeNumericEntity(3, 16);
650         this._sectionStart++;
651     } else if (
652         (c < "a" || c > "f") &&
653         (c < "A" || c > "F") &&
654         (c < "0" || c > "9")
655     ) {
656         if (!this._xmlMode) {
657             this._decodeNumericEntity(3, 16);
658         } else {
659             this._state = this._baseState;
660         }
661         this._index--;
662     }
663 };
664
665 Tokenizer.prototype._cleanup = function() {
666     if (this._sectionStart < 0) {
667         this._buffer = "";
668         this._bufferOffset += this._index;
669         this._index = 0;
670     } else if (this._running) {
671         if (this._state === TEXT) {
672             if (this._sectionStart !== this._index) {
673                 this._cbs.ontext(this._buffer.substr(this._sectionStart));
674             }
675             this._buffer = "";
676             this._bufferOffset += this._index;
677             this._index = 0;
678         } else if (this._sectionStart === this._index) {
679             //the section just started
680             this._buffer = "";
681             this._bufferOffset += this._index;
682             this._index = 0;
683         } else {
684             //remove everything unnecessary
685             this._buffer = this._buffer.substr(this._sectionStart);
686             this._index -= this._sectionStart;
687             this._bufferOffset += this._sectionStart;
688         }
689
690         this._sectionStart = 0;
691     }
692 };
693
694 //TODO make events conditional
695 Tokenizer.prototype.write = function(chunk) {
696     if (this._ended) this._cbs.onerror(Error(".write() after done!"));
697
698     this._buffer += chunk;
699     this._parse();
700 };
701
702 Tokenizer.prototype._parse = function() {
703     while (this._index < this._buffer.length && this._running) {
704         var c = this._buffer.charAt(this._index);
705         if (this._state === TEXT) {
706             this._stateText(c);
707         } else if (this._state === BEFORE_TAG_NAME) {
708             this._stateBeforeTagName(c);
709         } else if (this._state === IN_TAG_NAME) {
710             this._stateInTagName(c);
711         } else if (this._state === BEFORE_CLOSING_TAG_NAME) {
712             this._stateBeforeCloseingTagName(c);
713         } else if (this._state === IN_CLOSING_TAG_NAME) {
714             this._stateInCloseingTagName(c);
715         } else if (this._state === AFTER_CLOSING_TAG_NAME) {
716             this._stateAfterCloseingTagName(c);
717         } else if (this._state === IN_SELF_CLOSING_TAG) {
718             this._stateInSelfClosingTag(c);
719         } else if (this._state === BEFORE_ATTRIBUTE_NAME) {
720
721         /*
722                 *       attributes
723                 */
724             this._stateBeforeAttributeName(c);
725         } else if (this._state === IN_ATTRIBUTE_NAME) {
726             this._stateInAttributeName(c);
727         } else if (this._state === AFTER_ATTRIBUTE_NAME) {
728             this._stateAfterAttributeName(c);
729         } else if (this._state === BEFORE_ATTRIBUTE_VALUE) {
730             this._stateBeforeAttributeValue(c);
731         } else if (this._state === IN_ATTRIBUTE_VALUE_DQ) {
732             this._stateInAttributeValueDoubleQuotes(c);
733         } else if (this._state === IN_ATTRIBUTE_VALUE_SQ) {
734             this._stateInAttributeValueSingleQuotes(c);
735         } else if (this._state === IN_ATTRIBUTE_VALUE_NQ) {
736             this._stateInAttributeValueNoQuotes(c);
737         } else if (this._state === BEFORE_DECLARATION) {
738
739         /*
740                 *       declarations
741                 */
742             this._stateBeforeDeclaration(c);
743         } else if (this._state === IN_DECLARATION) {
744             this._stateInDeclaration(c);
745         } else if (this._state === IN_PROCESSING_INSTRUCTION) {
746
747         /*
748                 *       processing instructions
749                 */
750             this._stateInProcessingInstruction(c);
751         } else if (this._state === BEFORE_COMMENT) {
752
753         /*
754                 *       comments
755                 */
756             this._stateBeforeComment(c);
757         } else if (this._state === IN_COMMENT) {
758             this._stateInComment(c);
759         } else if (this._state === AFTER_COMMENT_1) {
760             this._stateAfterComment1(c);
761         } else if (this._state === AFTER_COMMENT_2) {
762             this._stateAfterComment2(c);
763         } else if (this._state === BEFORE_CDATA_1) {
764
765         /*
766                 *       cdata
767                 */
768             this._stateBeforeCdata1(c);
769         } else if (this._state === BEFORE_CDATA_2) {
770             this._stateBeforeCdata2(c);
771         } else if (this._state === BEFORE_CDATA_3) {
772             this._stateBeforeCdata3(c);
773         } else if (this._state === BEFORE_CDATA_4) {
774             this._stateBeforeCdata4(c);
775         } else if (this._state === BEFORE_CDATA_5) {
776             this._stateBeforeCdata5(c);
777         } else if (this._state === BEFORE_CDATA_6) {
778             this._stateBeforeCdata6(c);
779         } else if (this._state === IN_CDATA) {
780             this._stateInCdata(c);
781         } else if (this._state === AFTER_CDATA_1) {
782             this._stateAfterCdata1(c);
783         } else if (this._state === AFTER_CDATA_2) {
784             this._stateAfterCdata2(c);
785         } else if (this._state === BEFORE_SPECIAL) {
786
787         /*
788                 * special tags
789                 */
790             this._stateBeforeSpecial(c);
791         } else if (this._state === BEFORE_SPECIAL_END) {
792             this._stateBeforeSpecialEnd(c);
793         } else if (this._state === BEFORE_SCRIPT_1) {
794
795         /*
796                 * script
797                 */
798             this._stateBeforeScript1(c);
799         } else if (this._state === BEFORE_SCRIPT_2) {
800             this._stateBeforeScript2(c);
801         } else if (this._state === BEFORE_SCRIPT_3) {
802             this._stateBeforeScript3(c);
803         } else if (this._state === BEFORE_SCRIPT_4) {
804             this._stateBeforeScript4(c);
805         } else if (this._state === BEFORE_SCRIPT_5) {
806             this._stateBeforeScript5(c);
807         } else if (this._state === AFTER_SCRIPT_1) {
808             this._stateAfterScript1(c);
809         } else if (this._state === AFTER_SCRIPT_2) {
810             this._stateAfterScript2(c);
811         } else if (this._state === AFTER_SCRIPT_3) {
812             this._stateAfterScript3(c);
813         } else if (this._state === AFTER_SCRIPT_4) {
814             this._stateAfterScript4(c);
815         } else if (this._state === AFTER_SCRIPT_5) {
816             this._stateAfterScript5(c);
817         } else if (this._state === BEFORE_STYLE_1) {
818
819         /*
820                 * style
821                 */
822             this._stateBeforeStyle1(c);
823         } else if (this._state === BEFORE_STYLE_2) {
824             this._stateBeforeStyle2(c);
825         } else if (this._state === BEFORE_STYLE_3) {
826             this._stateBeforeStyle3(c);
827         } else if (this._state === BEFORE_STYLE_4) {
828             this._stateBeforeStyle4(c);
829         } else if (this._state === AFTER_STYLE_1) {
830             this._stateAfterStyle1(c);
831         } else if (this._state === AFTER_STYLE_2) {
832             this._stateAfterStyle2(c);
833         } else if (this._state === AFTER_STYLE_3) {
834             this._stateAfterStyle3(c);
835         } else if (this._state === AFTER_STYLE_4) {
836             this._stateAfterStyle4(c);
837         } else if (this._state === BEFORE_ENTITY) {
838
839         /*
840                 * entities
841                 */
842             this._stateBeforeEntity(c);
843         } else if (this._state === BEFORE_NUMERIC_ENTITY) {
844             this._stateBeforeNumericEntity(c);
845         } else if (this._state === IN_NAMED_ENTITY) {
846             this._stateInNamedEntity(c);
847         } else if (this._state === IN_NUMERIC_ENTITY) {
848             this._stateInNumericEntity(c);
849         } else if (this._state === IN_HEX_ENTITY) {
850             this._stateInHexEntity(c);
851         } else {
852             this._cbs.onerror(Error("unknown _state"), this._state);
853         }
854
855         this._index++;
856     }
857
858     this._cleanup();
859 };
860
861 Tokenizer.prototype.pause = function() {
862     this._running = false;
863 };
864 Tokenizer.prototype.resume = function() {
865     this._running = true;
866
867     if (this._index < this._buffer.length) {
868         this._parse();
869     }
870     if (this._ended) {
871         this._finish();
872     }
873 };
874
875 Tokenizer.prototype.end = function(chunk) {
876     if (this._ended) this._cbs.onerror(Error(".end() after done!"));
877     if (chunk) this.write(chunk);
878
879     this._ended = true;
880
881     if (this._running) this._finish();
882 };
883
884 Tokenizer.prototype._finish = function() {
885     //if there is remaining data, emit it in a reasonable way
886     if (this._sectionStart < this._index) {
887         this._handleTrailingData();
888     }
889
890     this._cbs.onend();
891 };
892
893 Tokenizer.prototype._handleTrailingData = function() {
894     var data = this._buffer.substr(this._sectionStart);
895
896     if (
897         this._state === IN_CDATA ||
898         this._state === AFTER_CDATA_1 ||
899         this._state === AFTER_CDATA_2
900     ) {
901         this._cbs.oncdata(data);
902     } else if (
903         this._state === IN_COMMENT ||
904         this._state === AFTER_COMMENT_1 ||
905         this._state === AFTER_COMMENT_2
906     ) {
907         this._cbs.oncomment(data);
908     } else if (this._state === IN_NAMED_ENTITY && !this._xmlMode) {
909         this._parseLegacyEntity();
910         if (this._sectionStart < this._index) {
911             this._state = this._baseState;
912             this._handleTrailingData();
913         }
914     } else if (this._state === IN_NUMERIC_ENTITY && !this._xmlMode) {
915         this._decodeNumericEntity(2, 10);
916         if (this._sectionStart < this._index) {
917             this._state = this._baseState;
918             this._handleTrailingData();
919         }
920     } else if (this._state === IN_HEX_ENTITY && !this._xmlMode) {
921         this._decodeNumericEntity(3, 16);
922         if (this._sectionStart < this._index) {
923             this._state = this._baseState;
924             this._handleTrailingData();
925         }
926     } else if (
927         this._state !== IN_TAG_NAME &&
928         this._state !== BEFORE_ATTRIBUTE_NAME &&
929         this._state !== BEFORE_ATTRIBUTE_VALUE &&
930         this._state !== AFTER_ATTRIBUTE_NAME &&
931         this._state !== IN_ATTRIBUTE_NAME &&
932         this._state !== IN_ATTRIBUTE_VALUE_SQ &&
933         this._state !== IN_ATTRIBUTE_VALUE_DQ &&
934         this._state !== IN_ATTRIBUTE_VALUE_NQ &&
935         this._state !== IN_CLOSING_TAG_NAME
936     ) {
937         this._cbs.ontext(data);
938     }
939     //else, ignore remaining data
940     //TODO add a way to remove current tag
941 };
942
943 Tokenizer.prototype.reset = function() {
944     Tokenizer.call(
945         this,
946         { xmlMode: this._xmlMode, decodeEntities: this._decodeEntities },
947         this._cbs
948     );
949 };
950
951 Tokenizer.prototype.getAbsoluteIndex = function() {
952     return this._bufferOffset + this._index;
953 };
954
955 Tokenizer.prototype._getSection = function() {
956     return this._buffer.substring(this._sectionStart, this._index);
957 };
958
959 Tokenizer.prototype._emitToken = function(name) {
960     this._cbs[name](this._getSection());
961     this._sectionStart = -1;
962 };
963
964 Tokenizer.prototype._emitPartial = function(value) {
965     if (this._baseState !== TEXT) {
966         this._cbs.onattribdata(value); //TODO implement the new event
967     } else {
968         this._cbs.ontext(value);
969     }
970 };