1 var Tokenizer = require("./Tokenizer.js");
6 xmlMode: Disables the special behavior for script/style tags (false by default)
7 lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)
8 lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)
21 onprocessinginstruction,
36 var openImpliesClose = {
37 tr: { tr: true, th: true, td: true },
39 td: { thead: true, th: true, td: true },
40 body: { head: true, link: true, script: true },
55 option: { option: true },
56 optgroup: { optgroup: true }
82 var foreignContextElements = {
87 var htmlIntegrationElements = {
94 "annotation-xml": true,
100 var re_nameEnd = /\s|\//;
102 function Parser(cbs, options) {
103 this._options = options || {};
104 this._cbs = cbs || {};
107 this._attribname = "";
108 this._attribvalue = "";
109 this._attribs = null;
111 this._foreignContext = [];
114 this.endIndex = null;
116 this._lowerCaseTagNames =
117 "lowerCaseTags" in this._options
118 ? !!this._options.lowerCaseTags
119 : !this._options.xmlMode;
120 this._lowerCaseAttributeNames =
121 "lowerCaseAttributeNames" in this._options
122 ? !!this._options.lowerCaseAttributeNames
123 : !this._options.xmlMode;
125 if (this._options.Tokenizer) {
126 Tokenizer = this._options.Tokenizer;
128 this._tokenizer = new Tokenizer(this._options, this);
130 if (this._cbs.onparserinit) this._cbs.onparserinit(this);
133 require("inherits")(Parser, require("events").EventEmitter);
135 Parser.prototype._updatePosition = function(initialOffset) {
136 if (this.endIndex === null) {
137 if (this._tokenizer._sectionStart <= initialOffset) {
140 this.startIndex = this._tokenizer._sectionStart - initialOffset;
142 } else this.startIndex = this.endIndex + 1;
143 this.endIndex = this._tokenizer.getAbsoluteIndex();
146 //Tokenizer event handlers
147 Parser.prototype.ontext = function(data) {
148 this._updatePosition(1);
151 if (this._cbs.ontext) this._cbs.ontext(data);
154 Parser.prototype.onopentagname = function(name) {
155 if (this._lowerCaseTagNames) {
156 name = name.toLowerCase();
159 this._tagname = name;
161 if (!this._options.xmlMode && name in openImpliesClose) {
164 (el = this._stack[this._stack.length - 1]) in
165 openImpliesClose[name];
170 if (this._options.xmlMode || !(name in voidElements)) {
171 this._stack.push(name);
172 if (name in foreignContextElements) this._foreignContext.push(true);
173 else if (name in htmlIntegrationElements)
174 this._foreignContext.push(false);
177 if (this._cbs.onopentagname) this._cbs.onopentagname(name);
178 if (this._cbs.onopentag) this._attribs = {};
181 Parser.prototype.onopentagend = function() {
182 this._updatePosition(1);
185 if (this._cbs.onopentag)
186 this._cbs.onopentag(this._tagname, this._attribs);
187 this._attribs = null;
191 !this._options.xmlMode &&
192 this._cbs.onclosetag &&
193 this._tagname in voidElements
195 this._cbs.onclosetag(this._tagname);
201 Parser.prototype.onclosetag = function(name) {
202 this._updatePosition(1);
204 if (this._lowerCaseTagNames) {
205 name = name.toLowerCase();
208 if (name in foreignContextElements || name in htmlIntegrationElements) {
209 this._foreignContext.pop();
213 this._stack.length &&
214 (!(name in voidElements) || this._options.xmlMode)
216 var pos = this._stack.lastIndexOf(name);
218 if (this._cbs.onclosetag) {
219 pos = this._stack.length - pos;
220 while (pos--) this._cbs.onclosetag(this._stack.pop());
221 } else this._stack.length = pos;
222 } else if (name === "p" && !this._options.xmlMode) {
223 this.onopentagname(name);
224 this._closeCurrentTag();
226 } else if (!this._options.xmlMode && (name === "br" || name === "p")) {
227 this.onopentagname(name);
228 this._closeCurrentTag();
232 Parser.prototype.onselfclosingtag = function() {
234 this._options.xmlMode ||
235 this._options.recognizeSelfClosing ||
236 this._foreignContext[this._foreignContext.length - 1]
238 this._closeCurrentTag();
244 Parser.prototype._closeCurrentTag = function() {
245 var name = this._tagname;
249 //self-closing tags will be on the top of the stack
250 //(cheaper check than in onclosetag)
251 if (this._stack[this._stack.length - 1] === name) {
252 if (this._cbs.onclosetag) {
253 this._cbs.onclosetag(name);
260 Parser.prototype.onattribname = function(name) {
261 if (this._lowerCaseAttributeNames) {
262 name = name.toLowerCase();
264 this._attribname = name;
267 Parser.prototype.onattribdata = function(value) {
268 this._attribvalue += value;
271 Parser.prototype.onattribend = function() {
272 if (this._cbs.onattribute)
273 this._cbs.onattribute(this._attribname, this._attribvalue);
276 !Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
278 this._attribs[this._attribname] = this._attribvalue;
280 this._attribname = "";
281 this._attribvalue = "";
284 Parser.prototype._getInstructionName = function(value) {
285 var idx = value.search(re_nameEnd),
286 name = idx < 0 ? value : value.substr(0, idx);
288 if (this._lowerCaseTagNames) {
289 name = name.toLowerCase();
295 Parser.prototype.ondeclaration = function(value) {
296 if (this._cbs.onprocessinginstruction) {
297 var name = this._getInstructionName(value);
298 this._cbs.onprocessinginstruction("!" + name, "!" + value);
302 Parser.prototype.onprocessinginstruction = function(value) {
303 if (this._cbs.onprocessinginstruction) {
304 var name = this._getInstructionName(value);
305 this._cbs.onprocessinginstruction("?" + name, "?" + value);
309 Parser.prototype.oncomment = function(value) {
310 this._updatePosition(4);
312 if (this._cbs.oncomment) this._cbs.oncomment(value);
313 if (this._cbs.oncommentend) this._cbs.oncommentend();
316 Parser.prototype.oncdata = function(value) {
317 this._updatePosition(1);
319 if (this._options.xmlMode || this._options.recognizeCDATA) {
320 if (this._cbs.oncdatastart) this._cbs.oncdatastart();
321 if (this._cbs.ontext) this._cbs.ontext(value);
322 if (this._cbs.oncdataend) this._cbs.oncdataend();
324 this.oncomment("[CDATA[" + value + "]]");
328 Parser.prototype.onerror = function(err) {
329 if (this._cbs.onerror) this._cbs.onerror(err);
332 Parser.prototype.onend = function() {
333 if (this._cbs.onclosetag) {
335 var i = this._stack.length;
337 this._cbs.onclosetag(this._stack[--i])
340 if (this._cbs.onend) this._cbs.onend();
343 //Resets the parser to a blank state, ready to parse a new HTML document
344 Parser.prototype.reset = function() {
345 if (this._cbs.onreset) this._cbs.onreset();
346 this._tokenizer.reset();
349 this._attribname = "";
350 this._attribs = null;
353 if (this._cbs.onparserinit) this._cbs.onparserinit(this);
356 //Parses a complete HTML document and pushes it to the handler
357 Parser.prototype.parseComplete = function(data) {
362 Parser.prototype.write = function(chunk) {
363 this._tokenizer.write(chunk);
366 Parser.prototype.end = function(chunk) {
367 this._tokenizer.end(chunk);
370 Parser.prototype.pause = function() {
371 this._tokenizer.pause();
374 Parser.prototype.resume = function() {
375 this._tokenizer.resume();
378 //alias for backwards compat
379 Parser.prototype.parseChunk = Parser.prototype.write;
380 Parser.prototype.done = Parser.prototype.end;
382 module.exports = Parser;