| // wrapper for non-node envs |
| ;(function (sax) { |
| |
| sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } |
| sax.SAXParser = SAXParser |
| sax.SAXStream = SAXStream |
| sax.createStream = createStream |
| |
| // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. |
| // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), |
| // since that's the earliest that a buffer overrun could occur. This way, checks are |
| // as rare as required, but as often as necessary to ensure never crossing this bound. |
| // Furthermore, buffers are only tested at most once per write(), so passing a very |
| // large string into write() might have undesirable effects, but this is manageable by |
| // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme |
| // edge case, result in creating at most one complete copy of the string passed in. |
| // Set to Infinity to have unlimited buffers. |
| sax.MAX_BUFFER_LENGTH = 64 * 1024 |
| |
| var buffers = [ |
| "comment", "sgmlDecl", "textNode", "tagName", "doctype", |
| "procInstName", "procInstBody", "entity", "attribName", |
| "attribValue", "cdata", "script" |
| ] |
| |
| sax.EVENTS = // for discoverability. |
| [ "text" |
| , "processinginstruction" |
| , "sgmldeclaration" |
| , "doctype" |
| , "comment" |
| , "attribute" |
| , "opentag" |
| , "closetag" |
| , "opencdata" |
| , "cdata" |
| , "closecdata" |
| , "error" |
| , "end" |
| , "ready" |
| , "script" |
| , "opennamespace" |
| , "closenamespace" |
| ] |
| |
| function SAXParser (strict, opt) { |
| if (!(this instanceof SAXParser)) return new SAXParser(strict, opt) |
| |
| var parser = this |
| clearBuffers(parser) |
| parser.q = parser.c = "" |
| parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH |
| parser.opt = opt || {} |
| parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags |
| parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase" |
| parser.tags = [] |
| parser.closed = parser.closedRoot = parser.sawRoot = false |
| parser.tag = parser.error = null |
| parser.strict = !!strict |
| parser.noscript = !!(strict || parser.opt.noscript) |
| parser.state = S.BEGIN |
| parser.ENTITIES = Object.create(sax.ENTITIES) |
| parser.attribList = [] |
| |
| // namespaces form a prototype chain. |
| // it always points at the current tag, |
| // which protos to its parent tag. |
| if (parser.opt.xmlns) parser.ns = Object.create(rootNS) |
| |
| // mostly just for error reporting |
| parser.trackPosition = parser.opt.position !== false |
| if (parser.trackPosition) { |
| parser.position = parser.line = parser.column = 0 |
| } |
| emit(parser, "onready") |
| } |
| |
| if (!Object.create) Object.create = function (o) { |
| function f () { this.__proto__ = o } |
| f.prototype = o |
| return new f |
| } |
| |
| if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) { |
| return o.__proto__ |
| } |
| |
| if (!Object.keys) Object.keys = function (o) { |
| var a = [] |
| for (var i in o) if (o.hasOwnProperty(i)) a.push(i) |
| return a |
| } |
| |
| function checkBufferLength (parser) { |
| var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) |
| , maxActual = 0 |
| for (var i = 0, l = buffers.length; i < l; i ++) { |
| var len = parser[buffers[i]].length |
| if (len > maxAllowed) { |
| // Text/cdata nodes can get big, and since they're buffered, |
| // we can get here under normal conditions. |
| // Avoid issues by emitting the text node now, |
| // so at least it won't get any bigger. |
| switch (buffers[i]) { |
| case "textNode": |
| closeText(parser) |
| break |
| |
| case "cdata": |
| emitNode(parser, "oncdata", parser.cdata) |
| parser.cdata = "" |
| break |
| |
| case "script": |
| emitNode(parser, "onscript", parser.script) |
| parser.script = "" |
| break |
| |
| default: |
| error(parser, "Max buffer length exceeded: "+buffers[i]) |
| } |
| } |
| maxActual = Math.max(maxActual, len) |
| } |
| // schedule the next check for the earliest possible buffer overrun. |
| parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual) |
| + parser.position |
| } |
| |
| function clearBuffers (parser) { |
| for (var i = 0, l = buffers.length; i < l; i ++) { |
| parser[buffers[i]] = "" |
| } |
| } |
| |
| function flushBuffers (parser) { |
| closeText(parser) |
| if (parser.cdata !== "") { |
| emitNode(parser, "oncdata", parser.cdata) |
| parser.cdata = "" |
| } |
| if (parser.script !== "") { |
| emitNode(parser, "onscript", parser.script) |
| parser.script = "" |
| } |
| } |
| |
| SAXParser.prototype = |
| { end: function () { end(this) } |
| , write: write |
| , resume: function () { this.error = null; return this } |
| , close: function () { return this.write(null) } |
| , flush: function () { flushBuffers(this) } |
| } |
| |
| try { |
| var Stream = require("stream").Stream |
| } catch (ex) { |
| var Stream = function () {} |
| } |
| |
| |
| var streamWraps = sax.EVENTS.filter(function (ev) { |
| return ev !== "error" && ev !== "end" |
| }) |
| |
| function createStream (strict, opt) { |
| return new SAXStream(strict, opt) |
| } |
| |
| function SAXStream (strict, opt) { |
| if (!(this instanceof SAXStream)) return new SAXStream(strict, opt) |
| |
| Stream.apply(this) |
| |
| this._parser = new SAXParser(strict, opt) |
| this.writable = true |
| this.readable = true |
| |
| |
| var me = this |
| |
| this._parser.onend = function () { |
| me.emit("end") |
| } |
| |
| this._parser.onerror = function (er) { |
| me.emit("error", er) |
| |
| // if didn't throw, then means error was handled. |
| // go ahead and clear error, so we can write again. |
| me._parser.error = null |
| } |
| |
| this._decoder = null; |
| |
| streamWraps.forEach(function (ev) { |
| Object.defineProperty(me, "on" + ev, { |
| get: function () { return me._parser["on" + ev] }, |
| set: function (h) { |
| if (!h) { |
| me.removeAllListeners(ev) |
| return me._parser["on"+ev] = h |
| } |
| me.on(ev, h) |
| }, |
| enumerable: true, |
| configurable: false |
| }) |
| }) |
| } |
| |
| SAXStream.prototype = Object.create(Stream.prototype, |
| { constructor: { value: SAXStream } }) |
| |
| SAXStream.prototype.write = function (data) { |
| if (typeof Buffer === 'function' && |
| typeof Buffer.isBuffer === 'function' && |
| Buffer.isBuffer(data)) { |
| if (!this._decoder) { |
| var SD = require('string_decoder').StringDecoder |
| this._decoder = new SD('utf8') |
| } |
| data = this._decoder.write(data); |
| } |
| |
| this._parser.write(data.toString()) |
| this.emit("data", data) |
| return true |
| } |
| |
| SAXStream.prototype.end = function (chunk) { |
| if (chunk && chunk.length) this.write(chunk) |
| this._parser.end() |
| return true |
| } |
| |
| SAXStream.prototype.on = function (ev, handler) { |
| var me = this |
| if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) { |
| me._parser["on"+ev] = function () { |
| var args = arguments.length === 1 ? [arguments[0]] |
| : Array.apply(null, arguments) |
| args.splice(0, 0, ev) |
| me.emit.apply(me, args) |
| } |
| } |
| |
| return Stream.prototype.on.call(me, ev, handler) |
| } |
| |
| |
| |
| // character classes and tokens |
| var whitespace = "\r\n\t " |
| // this really needs to be replaced with character classes. |
| // XML allows all manner of ridiculous numbers and digits. |
| , number = "0124356789" |
| , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| // (Letter | "_" | ":") |
| , quote = "'\"" |
| , entity = number+letter+"#" |
| , attribEnd = whitespace + ">" |
| , CDATA = "[CDATA[" |
| , DOCTYPE = "DOCTYPE" |
| , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" |
| , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" |
| , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } |
| |
| // turn all the string character sets into character class objects. |
| whitespace = charClass(whitespace) |
| number = charClass(number) |
| letter = charClass(letter) |
| |
| // http://www.w3.org/TR/REC-xml/#NT-NameStartChar |
| // This implementation works on strings, a single character at a time |
| // as such, it cannot ever support astral-plane characters (10000-EFFFF) |
| // without a significant breaking change to either this parser, or the |
| // JavaScript language. Implementation of an emoji-capable xml parser |
| // is left as an exercise for the reader. |
| var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ |
| |
| var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/ |
| |
| quote = charClass(quote) |
| entity = charClass(entity) |
| attribEnd = charClass(attribEnd) |
| |
| function charClass (str) { |
| return str.split("").reduce(function (s, c) { |
| s[c] = true |
| return s |
| }, {}) |
| } |
| |
| function isRegExp (c) { |
| return Object.prototype.toString.call(c) === '[object RegExp]' |
| } |
| |
| function is (charclass, c) { |
| return isRegExp(charclass) ? !!c.match(charclass) : charclass[c] |
| } |
| |
| function not (charclass, c) { |
| return !is(charclass, c) |
| } |
| |
| var S = 0 |
| sax.STATE = |
| { BEGIN : S++ |
| , TEXT : S++ // general stuff |
| , TEXT_ENTITY : S++ // & and such. |
| , OPEN_WAKA : S++ // < |
| , SGML_DECL : S++ // <!BLARG |
| , SGML_DECL_QUOTED : S++ // <!BLARG foo "bar |
| , DOCTYPE : S++ // <!DOCTYPE |
| , DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah |
| , DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ... |
| , DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo |
| , COMMENT_STARTING : S++ // <!- |
| , COMMENT : S++ // <!-- |
| , COMMENT_ENDING : S++ // <!-- blah - |
| , COMMENT_ENDED : S++ // <!-- blah -- |
| , CDATA : S++ // <![CDATA[ something |
| , CDATA_ENDING : S++ // ] |
| , CDATA_ENDING_2 : S++ // ]] |
| , PROC_INST : S++ // <?hi |
| , PROC_INST_BODY : S++ // <?hi there |
| , PROC_INST_ENDING : S++ // <?hi "there" ? |
| , OPEN_TAG : S++ // <strong |
| , OPEN_TAG_SLASH : S++ // <strong / |
| , ATTRIB : S++ // <a |
| , ATTRIB_NAME : S++ // <a foo |
| , ATTRIB_NAME_SAW_WHITE : S++ // <a foo _ |
| , ATTRIB_VALUE : S++ // <a foo= |
| , ATTRIB_VALUE_QUOTED : S++ // <a foo="bar |
| , ATTRIB_VALUE_CLOSED : S++ // <a foo="bar" |
| , ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar |
| , ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar=""" |
| , ATTRIB_VALUE_ENTITY_U : S++ // <foo bar=" |
| , CLOSE_TAG : S++ // </a |
| , CLOSE_TAG_SAW_WHITE : S++ // </a > |
| , SCRIPT : S++ // <script> ... |
| , SCRIPT_ENDING : S++ // <script> ... < |
| } |
| |
| sax.ENTITIES = |
| { "amp" : "&" |
| , "gt" : ">" |
| , "lt" : "<" |
| , "quot" : "\"" |
| , "apos" : "'" |
| , "AElig" : 198 |
| , "Aacute" : 193 |
| , "Acirc" : 194 |
| , "Agrave" : 192 |
| , "Aring" : 197 |
| , "Atilde" : 195 |
| , "Auml" : 196 |
| , "Ccedil" : 199 |
| , "ETH" : 208 |
| , "Eacute" : 201 |
| , "Ecirc" : 202 |
| , "Egrave" : 200 |
| , "Euml" : 203 |
| , "Iacute" : 205 |
| , "Icirc" : 206 |
| , "Igrave" : 204 |
| , "Iuml" : 207 |
| , "Ntilde" : 209 |
| , "Oacute" : 211 |
| , "Ocirc" : 212 |
| , "Ograve" : 210 |
| , "Oslash" : 216 |
| , "Otilde" : 213 |
| , "Ouml" : 214 |
| , "THORN" : 222 |
| , "Uacute" : 218 |
| , "Ucirc" : 219 |
| , "Ugrave" : 217 |
| , "Uuml" : 220 |
| , "Yacute" : 221 |
| , "aacute" : 225 |
| , "acirc" : 226 |
| , "aelig" : 230 |
| , "agrave" : 224 |
| , "aring" : 229 |
| , "atilde" : 227 |
| , "auml" : 228 |
| , "ccedil" : 231 |
| , "eacute" : 233 |
| , "ecirc" : 234 |
| , "egrave" : 232 |
| , "eth" : 240 |
| , "euml" : 235 |
| , "iacute" : 237 |
| , "icirc" : 238 |
| , "igrave" : 236 |
| , "iuml" : 239 |
| , "ntilde" : 241 |
| , "oacute" : 243 |
| , "ocirc" : 244 |
| , "ograve" : 242 |
| , "oslash" : 248 |
| , "otilde" : 245 |
| , "ouml" : 246 |
| , "szlig" : 223 |
| , "thorn" : 254 |
| , "uacute" : 250 |
| , "ucirc" : 251 |
| , "ugrave" : 249 |
| , "uuml" : 252 |
| , "yacute" : 253 |
| , "yuml" : 255 |
| , "copy" : 169 |
| , "reg" : 174 |
| , "nbsp" : 160 |
| , "iexcl" : 161 |
| , "cent" : 162 |
| , "pound" : 163 |
| , "curren" : 164 |
| , "yen" : 165 |
| , "brvbar" : 166 |
| , "sect" : 167 |
| , "uml" : 168 |
| , "ordf" : 170 |
| , "laquo" : 171 |
| , "not" : 172 |
| , "shy" : 173 |
| , "macr" : 175 |
| , "deg" : 176 |
| , "plusmn" : 177 |
| , "sup1" : 185 |
| , "sup2" : 178 |
| , "sup3" : 179 |
| , "acute" : 180 |
| , "micro" : 181 |
| , "para" : 182 |
| , "middot" : 183 |
| , "cedil" : 184 |
| , "ordm" : 186 |
| , "raquo" : 187 |
| , "frac14" : 188 |
| , "frac12" : 189 |
| , "frac34" : 190 |
| , "iquest" : 191 |
| , "times" : 215 |
| , "divide" : 247 |
| , "OElig" : 338 |
| , "oelig" : 339 |
| , "Scaron" : 352 |
| , "scaron" : 353 |
| , "Yuml" : 376 |
| , "fnof" : 402 |
| , "circ" : 710 |
| , "tilde" : 732 |
| , "Alpha" : 913 |
| , "Beta" : 914 |
| , "Gamma" : 915 |
| , "Delta" : 916 |
| , "Epsilon" : 917 |
| , "Zeta" : 918 |
| , "Eta" : 919 |
| , "Theta" : 920 |
| , "Iota" : 921 |
| , "Kappa" : 922 |
| , "Lambda" : 923 |
| , "Mu" : 924 |
| , "Nu" : 925 |
| , "Xi" : 926 |
| , "Omicron" : 927 |
| , "Pi" : 928 |
| , "Rho" : 929 |
| , "Sigma" : 931 |
| , "Tau" : 932 |
| , "Upsilon" : 933 |
| , "Phi" : 934 |
| , "Chi" : 935 |
| , "Psi" : 936 |
| , "Omega" : 937 |
| , "alpha" : 945 |
| , "beta" : 946 |
| , "gamma" : 947 |
| , "delta" : 948 |
| , "epsilon" : 949 |
| , "zeta" : 950 |
| , "eta" : 951 |
| , "theta" : 952 |
| , "iota" : 953 |
| , "kappa" : 954 |
| , "lambda" : 955 |
| , "mu" : 956 |
| , "nu" : 957 |
| , "xi" : 958 |
| , "omicron" : 959 |
| , "pi" : 960 |
| , "rho" : 961 |
| , "sigmaf" : 962 |
| , "sigma" : 963 |
| , "tau" : 964 |
| , "upsilon" : 965 |
| , "phi" : 966 |
| , "chi" : 967 |
| , "psi" : 968 |
| , "omega" : 969 |
| , "thetasym" : 977 |
| , "upsih" : 978 |
| , "piv" : 982 |
| , "ensp" : 8194 |
| , "emsp" : 8195 |
| , "thinsp" : 8201 |
| , "zwnj" : 8204 |
| , "zwj" : 8205 |
| , "lrm" : 8206 |
| , "rlm" : 8207 |
| , "ndash" : 8211 |
| , "mdash" : 8212 |
| , "lsquo" : 8216 |
| , "rsquo" : 8217 |
| , "sbquo" : 8218 |
| , "ldquo" : 8220 |
| , "rdquo" : 8221 |
| , "bdquo" : 8222 |
| , "dagger" : 8224 |
| , "Dagger" : 8225 |
| , "bull" : 8226 |
| , "hellip" : 8230 |
| , "permil" : 8240 |
| , "prime" : 8242 |
| , "Prime" : 8243 |
| , "lsaquo" : 8249 |
| , "rsaquo" : 8250 |
| , "oline" : 8254 |
| , "frasl" : 8260 |
| , "euro" : 8364 |
| , "image" : 8465 |
| , "weierp" : 8472 |
| , "real" : 8476 |
| , "trade" : 8482 |
| , "alefsym" : 8501 |
| , "larr" : 8592 |
| , "uarr" : 8593 |
| , "rarr" : 8594 |
| , "darr" : 8595 |
| , "harr" : 8596 |
| , "crarr" : 8629 |
| , "lArr" : 8656 |
| , "uArr" : 8657 |
| , "rArr" : 8658 |
| , "dArr" : 8659 |
| , "hArr" : 8660 |
| , "forall" : 8704 |
| , "part" : 8706 |
| , "exist" : 8707 |
| , "empty" : 8709 |
| , "nabla" : 8711 |
| , "isin" : 8712 |
| , "notin" : 8713 |
| , "ni" : 8715 |
| , "prod" : 8719 |
| , "sum" : 8721 |
| , "minus" : 8722 |
| , "lowast" : 8727 |
| , "radic" : 8730 |
| , "prop" : 8733 |
| , "infin" : 8734 |
| , "ang" : 8736 |
| , "and" : 8743 |
| , "or" : 8744 |
| , "cap" : 8745 |
| , "cup" : 8746 |
| , "int" : 8747 |
| , "there4" : 8756 |
| , "sim" : 8764 |
| , "cong" : 8773 |
| , "asymp" : 8776 |
| , "ne" : 8800 |
| , "equiv" : 8801 |
| , "le" : 8804 |
| , "ge" : 8805 |
| , "sub" : 8834 |
| , "sup" : 8835 |
| , "nsub" : 8836 |
| , "sube" : 8838 |
| , "supe" : 8839 |
| , "oplus" : 8853 |
| , "otimes" : 8855 |
| , "perp" : 8869 |
| , "sdot" : 8901 |
| , "lceil" : 8968 |
| , "rceil" : 8969 |
| , "lfloor" : 8970 |
| , "rfloor" : 8971 |
| , "lang" : 9001 |
| , "rang" : 9002 |
| , "loz" : 9674 |
| , "spades" : 9824 |
| , "clubs" : 9827 |
| , "hearts" : 9829 |
| , "diams" : 9830 |
| } |
| |
| Object.keys(sax.ENTITIES).forEach(function (key) { |
| var e = sax.ENTITIES[key] |
| var s = typeof e === 'number' ? String.fromCharCode(e) : e |
| sax.ENTITIES[key] = s |
| }) |
| |
| for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S |
| |
| // shorthand |
| S = sax.STATE |
| |
| function emit (parser, event, data) { |
| parser[event] && parser[event](data) |
| } |
| |
| function emitNode (parser, nodeType, data) { |
| if (parser.textNode) closeText(parser) |
| emit(parser, nodeType, data) |
| } |
| |
| function closeText (parser) { |
| parser.textNode = textopts(parser.opt, parser.textNode) |
| if (parser.textNode) emit(parser, "ontext", parser.textNode) |
| parser.textNode = "" |
| } |
| |
| function textopts (opt, text) { |
| if (opt.trim) text = text.trim() |
| if (opt.normalize) text = text.replace(/\s+/g, " ") |
| return text |
| } |
| |
| function error (parser, er) { |
| closeText(parser) |
| if (parser.trackPosition) { |
| er += "\nLine: "+parser.line+ |
| "\nColumn: "+parser.column+ |
| "\nChar: "+parser.c |
| } |
| er = new Error(er) |
| parser.error = er |
| emit(parser, "onerror", er) |
| return parser |
| } |
| |
| function end (parser) { |
| if (!parser.closedRoot) strictFail(parser, "Unclosed root tag") |
| if ((parser.state !== S.BEGIN) && (parser.state !== S.TEXT)) error(parser, "Unexpected end") |
| closeText(parser) |
| parser.c = "" |
| parser.closed = true |
| emit(parser, "onend") |
| SAXParser.call(parser, parser.strict, parser.opt) |
| return parser |
| } |
| |
| function strictFail (parser, message) { |
| if (typeof parser !== 'object' || !(parser instanceof SAXParser)) |
| throw new Error('bad call to strictFail'); |
| if (parser.strict) error(parser, message) |
| } |
| |
| function newTag (parser) { |
| if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]() |
| var parent = parser.tags[parser.tags.length - 1] || parser |
| , tag = parser.tag = { name : parser.tagName, attributes : {} } |
| |
| // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar" |
| if (parser.opt.xmlns) tag.ns = parent.ns |
| parser.attribList.length = 0 |
| } |
| |
| function qname (name, attribute) { |
| var i = name.indexOf(":") |
| , qualName = i < 0 ? [ "", name ] : name.split(":") |
| , prefix = qualName[0] |
| , local = qualName[1] |
| |
| // <x "xmlns"="http://foo"> |
| if (attribute && name === "xmlns") { |
| prefix = "xmlns" |
| local = "" |
| } |
| |
| return { prefix: prefix, local: local } |
| } |
| |
| function attrib (parser) { |
| if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]() |
| |
| if (parser.attribList.indexOf(parser.attribName) !== -1 || |
| parser.tag.attributes.hasOwnProperty(parser.attribName)) { |
| return parser.attribName = parser.attribValue = "" |
| } |
| |
| if (parser.opt.xmlns) { |
| var qn = qname(parser.attribName, true) |
| , prefix = qn.prefix |
| , local = qn.local |
| |
| if (prefix === "xmlns") { |
| // namespace binding attribute; push the binding into scope |
| if (local === "xml" && parser.attribValue !== XML_NAMESPACE) { |
| strictFail( parser |
| , "xml: prefix must be bound to " + XML_NAMESPACE + "\n" |
| + "Actual: " + parser.attribValue ) |
| } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) { |
| strictFail( parser |
| , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n" |
| + "Actual: " + parser.attribValue ) |
| } else { |
| var tag = parser.tag |
| , parent = parser.tags[parser.tags.length - 1] || parser |
| if (tag.ns === parent.ns) { |
| tag.ns = Object.create(parent.ns) |
| } |
| tag.ns[local] = parser.attribValue |
| } |
| } |
| |
| // defer onattribute events until all attributes have been seen |
| // so any new bindings can take effect; preserve attribute order |
| // so deferred events can be emitted in document order |
| parser.attribList.push([parser.attribName, parser.attribValue]) |
| } else { |
| // in non-xmlns mode, we can emit the event right away |
| parser.tag.attributes[parser.attribName] = parser.attribValue |
| emitNode( parser |
| , "onattribute" |
| , { name: parser.attribName |
| , value: parser.attribValue } ) |
| } |
| |
| parser.attribName = parser.attribValue = "" |
| } |
| |
| function openTag (parser, selfClosing) { |
| if (parser.opt.xmlns) { |
| // emit namespace binding events |
| var tag = parser.tag |
| |
| // add namespace info to tag |
| var qn = qname(parser.tagName) |
| tag.prefix = qn.prefix |
| tag.local = qn.local |
| tag.uri = tag.ns[qn.prefix] || "" |
| |
| if (tag.prefix && !tag.uri) { |
| strictFail(parser, "Unbound namespace prefix: " |
| + JSON.stringify(parser.tagName)) |
| tag.uri = qn.prefix |
| } |
| |
| var parent = parser.tags[parser.tags.length - 1] || parser |
| if (tag.ns && parent.ns !== tag.ns) { |
| Object.keys(tag.ns).forEach(function (p) { |
| emitNode( parser |
| , "onopennamespace" |
| , { prefix: p , uri: tag.ns[p] } ) |
| }) |
| } |
| |
| // handle deferred onattribute events |
| // Note: do not apply default ns to attributes: |
| // http://www.w3.org/TR/REC-xml-names/#defaulting |
| for (var i = 0, l = parser.attribList.length; i < l; i ++) { |
| var nv = parser.attribList[i] |
| var name = nv[0] |
| , value = nv[1] |
| , qualName = qname(name, true) |
| , prefix = qualName.prefix |
| , local = qualName.local |
| , uri = prefix == "" ? "" : (tag.ns[prefix] || "") |
| , a = { name: name |
| , value: value |
| , prefix: prefix |
| , local: local |
| , uri: uri |
| } |
| |
| // if there's any attributes with an undefined namespace, |
| // then fail on them now. |
| if (prefix && prefix != "xmlns" && !uri) { |
| strictFail(parser, "Unbound namespace prefix: " |
| + JSON.stringify(prefix)) |
| a.uri = prefix |
| } |
| parser.tag.attributes[name] = a |
| emitNode(parser, "onattribute", a) |
| } |
| parser.attribList.length = 0 |
| } |
| |
| parser.tag.isSelfClosing = !!selfClosing |
| |
| // process the tag |
| parser.sawRoot = true |
| parser.tags.push(parser.tag) |
| emitNode(parser, "onopentag", parser.tag) |
| if (!selfClosing) { |
| // special case for <script> in non-strict mode. |
| if (!parser.noscript && parser.tagName.toLowerCase() === "script") { |
| parser.state = S.SCRIPT |
| } else { |
| parser.state = S.TEXT |
| } |
| parser.tag = null |
| parser.tagName = "" |
| } |
| parser.attribName = parser.attribValue = "" |
| parser.attribList.length = 0 |
| } |
| |
| function closeTag (parser) { |
| if (!parser.tagName) { |
| strictFail(parser, "Weird empty close tag.") |
| parser.textNode += "</>" |
| parser.state = S.TEXT |
| return |
| } |
| |
| if (parser.script) { |
| if (parser.tagName !== "script") { |
| parser.script += "</" + parser.tagName + ">" |
| parser.tagName = "" |
| parser.state = S.SCRIPT |
| return |
| } |
| emitNode(parser, "onscript", parser.script) |
| parser.script = "" |
| } |
| |
| // first make sure that the closing tag actually exists. |
| // <a><b></c></b></a> will close everything, otherwise. |
| var t = parser.tags.length |
| var tagName = parser.tagName |
| if (!parser.strict) tagName = tagName[parser.looseCase]() |
| var closeTo = tagName |
| while (t --) { |
| var close = parser.tags[t] |
| if (close.name !== closeTo) { |
| // fail the first time in strict mode |
| strictFail(parser, "Unexpected close tag") |
| } else break |
| } |
| |
| // didn't find it. we already failed for strict, so just abort. |
| if (t < 0) { |
| strictFail(parser, "Unmatched closing tag: "+parser.tagName) |
| parser.textNode += "</" + parser.tagName + ">" |
| parser.state = S.TEXT |
| return |
| } |
| parser.tagName = tagName |
| var s = parser.tags.length |
| while (s --> t) { |
| var tag = parser.tag = parser.tags.pop() |
| parser.tagName = parser.tag.name |
| emitNode(parser, "onclosetag", parser.tagName) |
| |
| var x = {} |
| for (var i in tag.ns) x[i] = tag.ns[i] |
| |
| var parent = parser.tags[parser.tags.length - 1] || parser |
| if (parser.opt.xmlns && tag.ns !== parent.ns) { |
| // remove namespace bindings introduced by tag |
| Object.keys(tag.ns).forEach(function (p) { |
| var n = tag.ns[p] |
| emitNode(parser, "onclosenamespace", { prefix: p, uri: n }) |
| }) |
| } |
| } |
| if (t === 0) parser.closedRoot = true |
| parser.tagName = parser.attribValue = parser.attribName = "" |
| parser.attribList.length = 0 |
| parser.state = S.TEXT |
| } |
| |
| function parseEntity (parser) { |
| var entity = parser.entity |
| , entityLC = entity.toLowerCase() |
| , num |
| , numStr = "" |
| if (parser.ENTITIES[entity]) |
| return parser.ENTITIES[entity] |
| if (parser.ENTITIES[entityLC]) |
| return parser.ENTITIES[entityLC] |
| entity = entityLC |
| if (entity.charAt(0) === "#") { |
| if (entity.charAt(1) === "x") { |
| entity = entity.slice(2) |
| num = parseInt(entity, 16) |
| numStr = num.toString(16) |
| } else { |
| entity = entity.slice(1) |
| num = parseInt(entity, 10) |
| numStr = num.toString(10) |
| } |
| } |
| entity = entity.replace(/^0+/, "") |
| if (numStr.toLowerCase() !== entity) { |
| strictFail(parser, "Invalid character entity") |
| return "&"+parser.entity + ";" |
| } |
| |
| return String.fromCodePoint(num) |
| } |
| |
| function write (chunk) { |
| var parser = this |
| if (this.error) throw this.error |
| if (parser.closed) return error(parser, |
| "Cannot write after close. Assign an onready handler.") |
| if (chunk === null) return end(parser) |
| var i = 0, c = "" |
| while (parser.c = c = chunk.charAt(i++)) { |
| if (parser.trackPosition) { |
| parser.position ++ |
| if (c === "\n") { |
| parser.line ++ |
| parser.column = 0 |
| } else parser.column ++ |
| } |
| switch (parser.state) { |
| |
| case S.BEGIN: |
| if (c === "<") { |
| parser.state = S.OPEN_WAKA |
| parser.startTagPosition = parser.position |
| } else if (not(whitespace,c)) { |
| // have to process this as a text node. |
| // weird, but happens. |
| strictFail(parser, "Non-whitespace before first tag.") |
| parser.textNode = c |
| parser.state = S.TEXT |
| } |
| continue |
| |
| case S.TEXT: |
| if (parser.sawRoot && !parser.closedRoot) { |
| var starti = i-1 |
| while (c && c!=="<" && c!=="&") { |
| c = chunk.charAt(i++) |
| if (c && parser.trackPosition) { |
| parser.position ++ |
| if (c === "\n") { |
| parser.line ++ |
| parser.column = 0 |
| } else parser.column ++ |
| } |
| } |
| parser.textNode += chunk.substring(starti, i-1) |
| } |
| if (c === "<") { |
| parser.state = S.OPEN_WAKA |
| parser.startTagPosition = parser.position |
| } else { |
| if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) |
| strictFail(parser, "Text data outside of root node.") |
| if (c === "&") parser.state = S.TEXT_ENTITY |
| else parser.textNode += c |
| } |
| continue |
| |
| case S.SCRIPT: |
| // only non-strict |
| if (c === "<") { |
| parser.state = S.SCRIPT_ENDING |
| } else parser.script += c |
| continue |
| |
| case S.SCRIPT_ENDING: |
| if (c === "/") { |
| parser.state = S.CLOSE_TAG |
| } else { |
| parser.script += "<" + c |
| parser.state = S.SCRIPT |
| } |
| continue |
| |
| case S.OPEN_WAKA: |
| // either a /, ?, !, or text is coming next. |
| if (c === "!") { |
| parser.state = S.SGML_DECL |
| parser.sgmlDecl = "" |
| } else if (is(whitespace, c)) { |
| // wait for it... |
| } else if (is(nameStart,c)) { |
| parser.state = S.OPEN_TAG |
| parser.tagName = c |
| } else if (c === "/") { |
| parser.state = S.CLOSE_TAG |
| parser.tagName = "" |
| } else if (c === "?") { |
| parser.state = S.PROC_INST |
| parser.procInstName = parser.procInstBody = "" |
| } else { |
| strictFail(parser, "Unencoded <") |
| // if there was some whitespace, then add that in. |
| if (parser.startTagPosition + 1 < parser.position) { |
| var pad = parser.position - parser.startTagPosition |
| c = new Array(pad).join(" ") + c |
| } |
| parser.textNode += "<" + c |
| parser.state = S.TEXT |
| } |
| continue |
| |
| case S.SGML_DECL: |
| if ((parser.sgmlDecl+c).toUpperCase() === CDATA) { |
| emitNode(parser, "onopencdata") |
| parser.state = S.CDATA |
| parser.sgmlDecl = "" |
| parser.cdata = "" |
| } else if (parser.sgmlDecl+c === "--") { |
| parser.state = S.COMMENT |
| parser.comment = "" |
| parser.sgmlDecl = "" |
| } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) { |
| parser.state = S.DOCTYPE |
| if (parser.doctype || parser.sawRoot) strictFail(parser, |
| "Inappropriately located doctype declaration") |
| parser.doctype = "" |
| parser.sgmlDecl = "" |
| } else if (c === ">") { |
| emitNode(parser, "onsgmldeclaration", parser.sgmlDecl) |
| parser.sgmlDecl = "" |
| parser.state = S.TEXT |
| } else if (is(quote, c)) { |
| parser.state = S.SGML_DECL_QUOTED |
| parser.sgmlDecl += c |
| } else parser.sgmlDecl += c |
| continue |
| |
| case S.SGML_DECL_QUOTED: |
| if (c === parser.q) { |
| parser.state = S.SGML_DECL |
| parser.q = "" |
| } |
| parser.sgmlDecl += c |
| continue |
| |
| case S.DOCTYPE: |
| if (c === ">") { |
| parser.state = S.TEXT |
| emitNode(parser, "ondoctype", parser.doctype) |
| parser.doctype = true // just remember that we saw it. |
| } else { |
| parser.doctype += c |
| if (c === "[") parser.state = S.DOCTYPE_DTD |
| else if (is(quote, c)) { |
| parser.state = S.DOCTYPE_QUOTED |
| parser.q = c |
| } |
| } |
| continue |
| |
| case S.DOCTYPE_QUOTED: |
| parser.doctype += c |
| if (c === parser.q) { |
| parser.q = "" |
| parser.state = S.DOCTYPE |
| } |
| continue |
| |
| case S.DOCTYPE_DTD: |
| parser.doctype += c |
| if (c === "]") parser.state = S.DOCTYPE |
| else if (is(quote,c)) { |
| parser.state = S.DOCTYPE_DTD_QUOTED |
| parser.q = c |
| } |
| continue |
| |
| case S.DOCTYPE_DTD_QUOTED: |
| parser.doctype += c |
| if (c === parser.q) { |
| parser.state = S.DOCTYPE_DTD |
| parser.q = "" |
| } |
| continue |
| |
| case S.COMMENT: |
| if (c === "-") parser.state = S.COMMENT_ENDING |
| else parser.comment += c |
| continue |
| |
| case S.COMMENT_ENDING: |
| if (c === "-") { |
| parser.state = S.COMMENT_ENDED |
| parser.comment = textopts(parser.opt, parser.comment) |
| if (parser.comment) emitNode(parser, "oncomment", parser.comment) |
| parser.comment = "" |
| } else { |
| parser.comment += "-" + c |
| parser.state = S.COMMENT |
| } |
| continue |
| |
| case S.COMMENT_ENDED: |
| if (c !== ">") { |
| strictFail(parser, "Malformed comment") |
| // allow <!-- blah -- bloo --> in non-strict mode, |
| // which is a comment of " blah -- bloo " |
| parser.comment += "--" + c |
| parser.state = S.COMMENT |
| } else parser.state = S.TEXT |
| continue |
| |
| case S.CDATA: |
| if (c === "]") parser.state = S.CDATA_ENDING |
| else parser.cdata += c |
| continue |
| |
| case S.CDATA_ENDING: |
| if (c === "]") parser.state = S.CDATA_ENDING_2 |
| else { |
| parser.cdata += "]" + c |
| parser.state = S.CDATA |
| } |
| continue |
| |
| case S.CDATA_ENDING_2: |
| if (c === ">") { |
| if (parser.cdata) emitNode(parser, "oncdata", parser.cdata) |
| emitNode(parser, "onclosecdata") |
| parser.cdata = "" |
| parser.state = S.TEXT |
| } else if (c === "]") { |
| parser.cdata += "]" |
| } else { |
| parser.cdata += "]]" + c |
| parser.state = S.CDATA |
| } |
| continue |
| |
| case S.PROC_INST: |
| if (c === "?") parser.state = S.PROC_INST_ENDING |
| else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY |
| else parser.procInstName += c |
| continue |
| |
| case S.PROC_INST_BODY: |
| if (!parser.procInstBody && is(whitespace, c)) continue |
| else if (c === "?") parser.state = S.PROC_INST_ENDING |
| else parser.procInstBody += c |
| continue |
| |
| case S.PROC_INST_ENDING: |
| if (c === ">") { |
| emitNode(parser, "onprocessinginstruction", { |
| name : parser.procInstName, |
| body : parser.procInstBody |
| }) |
| parser.procInstName = parser.procInstBody = "" |
| parser.state = S.TEXT |
| } else { |
| parser.procInstBody += "?" + c |
| parser.state = S.PROC_INST_BODY |
| } |
| continue |
| |
| case S.OPEN_TAG: |
| if (is(nameBody, c)) parser.tagName += c |
| else { |
| newTag(parser) |
| if (c === ">") openTag(parser) |
| else if (c === "/") parser.state = S.OPEN_TAG_SLASH |
| else { |
| if (not(whitespace, c)) strictFail( |
| parser, "Invalid character in tag name") |
| parser.state = S.ATTRIB |
| } |
| } |
| continue |
| |
| case S.OPEN_TAG_SLASH: |
| if (c === ">") { |
| openTag(parser, true) |
| closeTag(parser) |
| } else { |
| strictFail(parser, "Forward-slash in opening tag not followed by >") |
| parser.state = S.ATTRIB |
| } |
| continue |
| |
| case S.ATTRIB: |
| // haven't read the attribute name yet. |
| if (is(whitespace, c)) continue |
| else if (c === ">") openTag(parser) |
| else if (c === "/") parser.state = S.OPEN_TAG_SLASH |
| else if (is(nameStart, c)) { |
| parser.attribName = c |
| parser.attribValue = "" |
| parser.state = S.ATTRIB_NAME |
| } else strictFail(parser, "Invalid attribute name") |
| continue |
| |
| case S.ATTRIB_NAME: |
| if (c === "=") parser.state = S.ATTRIB_VALUE |
| else if (c === ">") { |
| strictFail(parser, "Attribute without value") |
| parser.attribValue = parser.attribName |
| attrib(parser) |
| openTag(parser) |
| } |
| else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE |
| else if (is(nameBody, c)) parser.attribName += c |
| else strictFail(parser, "Invalid attribute name") |
| continue |
| |
| case S.ATTRIB_NAME_SAW_WHITE: |
| if (c === "=") parser.state = S.ATTRIB_VALUE |
| else if (is(whitespace, c)) continue |
| else { |
| strictFail(parser, "Attribute without value") |
| parser.tag.attributes[parser.attribName] = "" |
| parser.attribValue = "" |
| emitNode(parser, "onattribute", |
| { name : parser.attribName, value : "" }) |
| parser.attribName = "" |
| if (c === ">") openTag(parser) |
| else if (is(nameStart, c)) { |
| parser.attribName = c |
| parser.state = S.ATTRIB_NAME |
| } else { |
| strictFail(parser, "Invalid attribute name") |
| parser.state = S.ATTRIB |
| } |
| } |
| continue |
| |
| case S.ATTRIB_VALUE: |
| if (is(whitespace, c)) continue |
| else if (is(quote, c)) { |
| parser.q = c |
| parser.state = S.ATTRIB_VALUE_QUOTED |
| } else { |
| strictFail(parser, "Unquoted attribute value") |
| parser.state = S.ATTRIB_VALUE_UNQUOTED |
| parser.attribValue = c |
| } |
| continue |
| |
| case S.ATTRIB_VALUE_QUOTED: |
| if (c !== parser.q) { |
| if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q |
| else parser.attribValue += c |
| continue |
| } |
| attrib(parser) |
| parser.q = "" |
| parser.state = S.ATTRIB_VALUE_CLOSED |
| continue |
| |
| case S.ATTRIB_VALUE_CLOSED: |
| if (is(whitespace, c)) { |
| parser.state = S.ATTRIB |
| } else if (c === ">") openTag(parser) |
| else if (c === "/") parser.state = S.OPEN_TAG_SLASH |
| else if (is(nameStart, c)) { |
| strictFail(parser, "No whitespace between attributes") |
| parser.attribName = c |
| parser.attribValue = "" |
| parser.state = S.ATTRIB_NAME |
| } else strictFail(parser, "Invalid attribute name") |
| continue |
| |
| case S.ATTRIB_VALUE_UNQUOTED: |
| if (not(attribEnd,c)) { |
| if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U |
| else parser.attribValue += c |
| continue |
| } |
| attrib(parser) |
| if (c === ">") openTag(parser) |
| else parser.state = S.ATTRIB |
| continue |
| |
| case S.CLOSE_TAG: |
| if (!parser.tagName) { |
| if (is(whitespace, c)) continue |
| else if (not(nameStart, c)) { |
| if (parser.script) { |
| parser.script += "</" + c |
| parser.state = S.SCRIPT |
| } else { |
| strictFail(parser, "Invalid tagname in closing tag.") |
| } |
| } else parser.tagName = c |
| } |
| else if (c === ">") closeTag(parser) |
| else if (is(nameBody, c)) parser.tagName += c |
| else if (parser.script) { |
| parser.script += "</" + parser.tagName |
| parser.tagName = "" |
| parser.state = S.SCRIPT |
| } else { |
| if (not(whitespace, c)) strictFail(parser, |
| "Invalid tagname in closing tag") |
| parser.state = S.CLOSE_TAG_SAW_WHITE |
| } |
| continue |
| |
| case S.CLOSE_TAG_SAW_WHITE: |
| if (is(whitespace, c)) continue |
| if (c === ">") closeTag(parser) |
| else strictFail(parser, "Invalid characters in closing tag") |
| continue |
| |
| case S.TEXT_ENTITY: |
| case S.ATTRIB_VALUE_ENTITY_Q: |
| case S.ATTRIB_VALUE_ENTITY_U: |
| switch(parser.state) { |
| case S.TEXT_ENTITY: |
| var returnState = S.TEXT, buffer = "textNode" |
| break |
| |
| case S.ATTRIB_VALUE_ENTITY_Q: |
| var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue" |
| break |
| |
| case S.ATTRIB_VALUE_ENTITY_U: |
| var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue" |
| break |
| } |
| if (c === ";") { |
| parser[buffer] += parseEntity(parser) |
| parser.entity = "" |
| parser.state = returnState |
| } |
| else if (is(entity, c)) parser.entity += c |
| else { |
| strictFail(parser, "Invalid character entity") |
| parser[buffer] += "&" + parser.entity + c |
| parser.entity = "" |
| parser.state = returnState |
| } |
| continue |
| |
| default: |
| throw new Error(parser, "Unknown state: " + parser.state) |
| } |
| } // while |
| // cdata blocks can get very big under normal conditions. emit and move on. |
| // if (parser.state === S.CDATA && parser.cdata) { |
| // emitNode(parser, "oncdata", parser.cdata) |
| // parser.cdata = "" |
| // } |
| if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser) |
| return parser |
| } |
| |
| /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */ |
| if (!String.fromCodePoint) { |
| (function() { |
| var stringFromCharCode = String.fromCharCode; |
| var floor = Math.floor; |
| var fromCodePoint = function() { |
| var MAX_SIZE = 0x4000; |
| var codeUnits = []; |
| var highSurrogate; |
| var lowSurrogate; |
| var index = -1; |
| var length = arguments.length; |
| if (!length) { |
| return ''; |
| } |
| var result = ''; |
| while (++index < length) { |
| var codePoint = Number(arguments[index]); |
| if ( |
| !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` |
| codePoint < 0 || // not a valid Unicode code point |
| codePoint > 0x10FFFF || // not a valid Unicode code point |
| floor(codePoint) != codePoint // not an integer |
| ) { |
| throw RangeError('Invalid code point: ' + codePoint); |
| } |
| if (codePoint <= 0xFFFF) { // BMP code point |
| codeUnits.push(codePoint); |
| } else { // Astral code point; split in surrogate halves |
| // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae |
| codePoint -= 0x10000; |
| highSurrogate = (codePoint >> 10) + 0xD800; |
| lowSurrogate = (codePoint % 0x400) + 0xDC00; |
| codeUnits.push(highSurrogate, lowSurrogate); |
| } |
| if (index + 1 == length || codeUnits.length > MAX_SIZE) { |
| result += stringFromCharCode.apply(null, codeUnits); |
| codeUnits.length = 0; |
| } |
| } |
| return result; |
| }; |
| if (Object.defineProperty) { |
| Object.defineProperty(String, 'fromCodePoint', { |
| 'value': fromCodePoint, |
| 'configurable': true, |
| 'writable': true |
| }); |
| } else { |
| String.fromCodePoint = fromCodePoint; |
| } |
| }()); |
| } |
| |
| })(typeof exports === "undefined" ? sax = {} : exports) |