diff options
Diffstat (limited to 'generator/node_modules/@xmldom/xmldom/lib/sax.js')
-rw-r--r-- | generator/node_modules/@xmldom/xmldom/lib/sax.js | 662 |
1 files changed, 662 insertions, 0 deletions
diff --git a/generator/node_modules/@xmldom/xmldom/lib/sax.js b/generator/node_modules/@xmldom/xmldom/lib/sax.js new file mode 100644 index 0000000..2ec3ea1 --- /dev/null +++ b/generator/node_modules/@xmldom/xmldom/lib/sax.js @@ -0,0 +1,662 @@ +var NAMESPACE = require("./conventions").NAMESPACE; + +//[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +//[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] +//[5] Name ::= NameStartChar (NameChar)* +var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF +var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]"); +var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$'); +//var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/ +//var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',') + +//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE +//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE +var S_TAG = 0;//tag name offerring +var S_ATTR = 1;//attr name offerring +var S_ATTR_SPACE=2;//attr name end and space offer +var S_EQ = 3;//=space? +var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only) +var S_ATTR_END = 5;//attr value end and no space(quot end) +var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer) +var S_TAG_CLOSE = 7;//closed el<el /> + +/** + * Creates an error that will not be caught by XMLReader aka the SAX parser. + * + * @param {string} message + * @param {any?} locator Optional, can provide details about the location in the source + * @constructor + */ +function ParseError(message, locator) { + this.message = message + this.locator = locator + if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError); +} +ParseError.prototype = new Error(); +ParseError.prototype.name = ParseError.name + +function XMLReader(){ + +} + +XMLReader.prototype = { + parse:function(source,defaultNSMap,entityMap){ + var domBuilder = this.domBuilder; + domBuilder.startDocument(); + _copy(defaultNSMap ,defaultNSMap = {}) + parse(source,defaultNSMap,entityMap, + domBuilder,this.errorHandler); + domBuilder.endDocument(); + } +} +function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){ + function fixedFromCharCode(code) { + // String.prototype.fromCharCode does not supports + // > 2 bytes unicode chars directly + if (code > 0xffff) { + code -= 0x10000; + var surrogate1 = 0xd800 + (code >> 10) + , surrogate2 = 0xdc00 + (code & 0x3ff); + + return String.fromCharCode(surrogate1, surrogate2); + } else { + return String.fromCharCode(code); + } + } + function entityReplacer(a){ + var k = a.slice(1,-1); + if (Object.hasOwnProperty.call(entityMap, k)) { + return entityMap[k]; + }else if(k.charAt(0) === '#'){ + return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x'))) + }else{ + errorHandler.error('entity not found:'+a); + return a; + } + } + function appendText(end){//has some bugs + if(end>start){ + var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer); + locator&&position(start); + domBuilder.characters(xt,0,end-start); + start = end + } + } + function position(p,m){ + while(p>=lineEnd && (m = linePattern.exec(source))){ + lineStart = m.index; + lineEnd = lineStart + m[0].length; + locator.lineNumber++; + //console.log('line++:',locator,startPos,endPos) + } + locator.columnNumber = p-lineStart+1; + } + var lineStart = 0; + var lineEnd = 0; + var linePattern = /.*(?:\r\n?|\n)|.*$/g + var locator = domBuilder.locator; + + var parseStack = [{currentNSMap:defaultNSMapCopy}] + var closeMap = {}; + var start = 0; + while(true){ + try{ + var tagStart = source.indexOf('<',start); + if(tagStart<0){ + if(!source.substr(start).match(/^\s*$/)){ + var doc = domBuilder.doc; + var text = doc.createTextNode(source.substr(start)); + doc.appendChild(text); + domBuilder.currentElement = text; + } + return; + } + if(tagStart>start){ + appendText(tagStart); + } + switch(source.charAt(tagStart+1)){ + case '/': + var end = source.indexOf('>',tagStart+3); + var tagName = source.substring(tagStart + 2, end).replace(/[ \t\n\r]+$/g, ''); + var config = parseStack.pop(); + if(end<0){ + + tagName = source.substring(tagStart+2).replace(/[\s<].*/,''); + errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName); + end = tagStart+1+tagName.length; + }else if(tagName.match(/\s</)){ + tagName = tagName.replace(/[\s<].*/,''); + errorHandler.error("end tag name: "+tagName+' maybe not complete'); + end = tagStart+1+tagName.length; + } + var localNSMap = config.localNSMap; + var endMatch = config.tagName == tagName; + var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase() + if(endIgnoreCaseMach){ + domBuilder.endElement(config.uri,config.localName,tagName); + if(localNSMap){ + for (var prefix in localNSMap) { + if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) { + domBuilder.endPrefixMapping(prefix); + } + } + } + if(!endMatch){ + errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case + } + }else{ + parseStack.push(config) + } + + end++; + break; + // end elment + case '?':// <?...?> + locator&&position(tagStart); + end = parseInstruction(source,tagStart,domBuilder); + break; + case '!':// <!doctype,<![CDATA,<!-- + locator&&position(tagStart); + end = parseDCC(source,tagStart,domBuilder,errorHandler); + break; + default: + locator&&position(tagStart); + var el = new ElementAttributes(); + var currentNSMap = parseStack[parseStack.length-1].currentNSMap; + //elStartEnd + var end = parseElementStartPart(source,tagStart,el,currentNSMap,entityReplacer,errorHandler); + var len = el.length; + + + if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){ + el.closed = true; + if(!entityMap.nbsp){ + errorHandler.warning('unclosed xml attribute'); + } + } + if(locator && len){ + var locator2 = copyLocator(locator,{}); + //try{//attribute position fixed + for(var i = 0;i<len;i++){ + var a = el[i]; + position(a.offset); + a.locator = copyLocator(locator,{}); + } + domBuilder.locator = locator2 + if(appendElement(el,domBuilder,currentNSMap)){ + parseStack.push(el) + } + domBuilder.locator = locator; + }else{ + if(appendElement(el,domBuilder,currentNSMap)){ + parseStack.push(el) + } + } + + if (NAMESPACE.isHTML(el.uri) && !el.closed) { + end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder) + } else { + end++; + } + } + }catch(e){ + if (e instanceof ParseError) { + throw e; + } + errorHandler.error('element parse error: '+e) + end = -1; + } + if(end>start){ + start = end; + }else{ + //TODO: 这里有可能sax回退,有位置错误风险 + appendText(Math.max(tagStart,start)+1); + } + } +} +function copyLocator(f,t){ + t.lineNumber = f.lineNumber; + t.columnNumber = f.columnNumber; + return t; +} + +/** + * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack); + * @return end of the elementStartPart(end of elementEndPart for selfClosed el) + */ +function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){ + + /** + * @param {string} qname + * @param {string} value + * @param {number} startIndex + */ + function addAttribute(qname, value, startIndex) { + if (el.attributeNames.hasOwnProperty(qname)) { + errorHandler.fatalError('Attribute ' + qname + ' redefined') + } + el.addValue( + qname, + // @see https://www.w3.org/TR/xml/#AVNormalize + // since the xmldom sax parser does not "interpret" DTD the following is not implemented: + // - recursive replacement of (DTD) entity references + // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA + value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer), + startIndex + ) + } + var attrName; + var value; + var p = ++start; + var s = S_TAG;//status + while(true){ + var c = source.charAt(p); + switch(c){ + case '=': + if(s === S_ATTR){//attrName + attrName = source.slice(start,p); + s = S_EQ; + }else if(s === S_ATTR_SPACE){ + s = S_EQ; + }else{ + //fatalError: equal must after attrName or space after attrName + throw new Error('attribute equal must after attrName'); // No known test case + } + break; + case '\'': + case '"': + if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE + ){//equal + if(s === S_ATTR){ + errorHandler.warning('attribute value must after "="') + attrName = source.slice(start,p) + } + start = p+1; + p = source.indexOf(c,start) + if(p>0){ + value = source.slice(start, p); + addAttribute(attrName, value, start-1); + s = S_ATTR_END; + }else{ + //fatalError: no end quot match + throw new Error('attribute value no end \''+c+'\' match'); + } + }else if(s == S_ATTR_NOQUOT_VALUE){ + value = source.slice(start, p); + addAttribute(attrName, value, start); + errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!'); + start = p+1; + s = S_ATTR_END + }else{ + //fatalError: no equal before + throw new Error('attribute value must after "="'); // No known test case + } + break; + case '/': + switch(s){ + case S_TAG: + el.setTagName(source.slice(start,p)); + case S_ATTR_END: + case S_TAG_SPACE: + case S_TAG_CLOSE: + s =S_TAG_CLOSE; + el.closed = true; + case S_ATTR_NOQUOT_VALUE: + case S_ATTR: + break; + case S_ATTR_SPACE: + el.closed = true; + break; + //case S_EQ: + default: + throw new Error("attribute invalid close char('/')") // No known test case + } + break; + case ''://end document + errorHandler.error('unexpected end of input'); + if(s == S_TAG){ + el.setTagName(source.slice(start,p)); + } + return p; + case '>': + switch(s){ + case S_TAG: + el.setTagName(source.slice(start,p)); + case S_ATTR_END: + case S_TAG_SPACE: + case S_TAG_CLOSE: + break;//normal + case S_ATTR_NOQUOT_VALUE://Compatible state + case S_ATTR: + value = source.slice(start,p); + if(value.slice(-1) === '/'){ + el.closed = true; + value = value.slice(0,-1) + } + case S_ATTR_SPACE: + if(s === S_ATTR_SPACE){ + value = attrName; + } + if(s == S_ATTR_NOQUOT_VALUE){ + errorHandler.warning('attribute "'+value+'" missed quot(")!'); + addAttribute(attrName, value, start) + }else{ + if(!NAMESPACE.isHTML(currentNSMap['']) || !value.match(/^(?:disabled|checked|selected)$/i)){ + errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!') + } + addAttribute(value, value, start) + } + break; + case S_EQ: + throw new Error('attribute value missed!!'); + } +// console.log(tagName,tagNamePattern,tagNamePattern.test(tagName)) + return p; + /*xml space '\x20' | #x9 | #xD | #xA; */ + case '\u0080': + c = ' '; + default: + if(c<= ' '){//space + switch(s){ + case S_TAG: + el.setTagName(source.slice(start,p));//tagName + s = S_TAG_SPACE; + break; + case S_ATTR: + attrName = source.slice(start,p) + s = S_ATTR_SPACE; + break; + case S_ATTR_NOQUOT_VALUE: + var value = source.slice(start, p); + errorHandler.warning('attribute "'+value+'" missed quot(")!!'); + addAttribute(attrName, value, start) + case S_ATTR_END: + s = S_TAG_SPACE; + break; + //case S_TAG_SPACE: + //case S_EQ: + //case S_ATTR_SPACE: + // void();break; + //case S_TAG_CLOSE: + //ignore warning + } + }else{//not space +//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE +//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE + switch(s){ + //case S_TAG:void();break; + //case S_ATTR:void();break; + //case S_ATTR_NOQUOT_VALUE:void();break; + case S_ATTR_SPACE: + var tagName = el.tagName; + if (!NAMESPACE.isHTML(currentNSMap['']) || !attrName.match(/^(?:disabled|checked|selected)$/i)) { + errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!') + } + addAttribute(attrName, attrName, start); + start = p; + s = S_ATTR; + break; + case S_ATTR_END: + errorHandler.warning('attribute space is required"'+attrName+'"!!') + case S_TAG_SPACE: + s = S_ATTR; + start = p; + break; + case S_EQ: + s = S_ATTR_NOQUOT_VALUE; + start = p; + break; + case S_TAG_CLOSE: + throw new Error("elements closed character '/' and '>' must be connected to"); + } + } + }//end outer switch + //console.log('p++',p) + p++; + } +} +/** + * @return true if has new namespace define + */ +function appendElement(el,domBuilder,currentNSMap){ + var tagName = el.tagName; + var localNSMap = null; + //var currentNSMap = parseStack[parseStack.length-1].currentNSMap; + var i = el.length; + while(i--){ + var a = el[i]; + var qName = a.qName; + var value = a.value; + var nsp = qName.indexOf(':'); + if(nsp>0){ + var prefix = a.prefix = qName.slice(0,nsp); + var localName = qName.slice(nsp+1); + var nsPrefix = prefix === 'xmlns' && localName + }else{ + localName = qName; + prefix = null + nsPrefix = qName === 'xmlns' && '' + } + //can not set prefix,because prefix !== '' + a.localName = localName ; + //prefix == null for no ns prefix attribute + if(nsPrefix !== false){//hack!! + if(localNSMap == null){ + localNSMap = {} + //console.log(currentNSMap,0) + _copy(currentNSMap,currentNSMap={}) + //console.log(currentNSMap,1) + } + currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value; + a.uri = NAMESPACE.XMLNS + domBuilder.startPrefixMapping(nsPrefix, value) + } + } + var i = el.length; + while(i--){ + a = el[i]; + var prefix = a.prefix; + if(prefix){//no prefix attribute has no namespace + if(prefix === 'xml'){ + a.uri = NAMESPACE.XML; + }if(prefix !== 'xmlns'){ + a.uri = currentNSMap[prefix || ''] + + //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)} + } + } + } + var nsp = tagName.indexOf(':'); + if(nsp>0){ + prefix = el.prefix = tagName.slice(0,nsp); + localName = el.localName = tagName.slice(nsp+1); + }else{ + prefix = null;//important!! + localName = el.localName = tagName; + } + //no prefix element has default namespace + var ns = el.uri = currentNSMap[prefix || '']; + domBuilder.startElement(ns,localName,tagName,el); + //endPrefixMapping and startPrefixMapping have not any help for dom builder + //localNSMap = null + if(el.closed){ + domBuilder.endElement(ns,localName,tagName); + if(localNSMap){ + for (prefix in localNSMap) { + if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) { + domBuilder.endPrefixMapping(prefix); + } + } + } + }else{ + el.currentNSMap = currentNSMap; + el.localNSMap = localNSMap; + //parseStack.push(el); + return true; + } +} +function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){ + if(/^(?:script|textarea)$/i.test(tagName)){ + var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd); + var text = source.substring(elStartEnd+1,elEndStart); + if(/[&<]/.test(text)){ + if(/^script$/i.test(tagName)){ + //if(!/\]\]>/.test(text)){ + //lexHandler.startCDATA(); + domBuilder.characters(text,0,text.length); + //lexHandler.endCDATA(); + return elEndStart; + //} + }//}else{//text area + text = text.replace(/&#?\w+;/g,entityReplacer); + domBuilder.characters(text,0,text.length); + return elEndStart; + //} + + } + } + return elStartEnd+1; +} +function fixSelfClosed(source,elStartEnd,tagName,closeMap){ + //if(tagName in closeMap){ + var pos = closeMap[tagName]; + if(pos == null){ + //console.log(tagName) + pos = source.lastIndexOf('</'+tagName+'>') + if(pos<elStartEnd){//忘记闭合 + pos = source.lastIndexOf('</'+tagName) + } + closeMap[tagName] =pos + } + return pos<elStartEnd; + //} +} + +function _copy (source, target) { + for (var n in source) { + if (Object.prototype.hasOwnProperty.call(source, n)) { + target[n] = source[n]; + } + } +} + +function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!' + var next= source.charAt(start+2) + switch(next){ + case '-': + if(source.charAt(start + 3) === '-'){ + var end = source.indexOf('-->',start+4); + //append comment source.substring(4,end)//<!-- + if(end>start){ + domBuilder.comment(source,start+4,end-start-4); + return end+3; + }else{ + errorHandler.error("Unclosed comment"); + return -1; + } + }else{ + //error + return -1; + } + default: + if(source.substr(start+3,6) == 'CDATA['){ + var end = source.indexOf(']]>',start+9); + domBuilder.startCDATA(); + domBuilder.characters(source,start+9,end-start-9); + domBuilder.endCDATA() + return end+3; + } + //<!DOCTYPE + //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId) + var matchs = split(source,start); + var len = matchs.length; + if(len>1 && /!doctype/i.test(matchs[0][0])){ + var name = matchs[1][0]; + var pubid = false; + var sysid = false; + if(len>3){ + if(/^public$/i.test(matchs[2][0])){ + pubid = matchs[3][0]; + sysid = len>4 && matchs[4][0]; + }else if(/^system$/i.test(matchs[2][0])){ + sysid = matchs[3][0]; + } + } + var lastMatch = matchs[len-1] + domBuilder.startDTD(name, pubid, sysid); + domBuilder.endDTD(); + + return lastMatch.index+lastMatch[0].length + } + } + return -1; +} + + + +function parseInstruction(source,start,domBuilder){ + var end = source.indexOf('?>',start); + if(end){ + var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/); + if(match){ + var len = match[0].length; + domBuilder.processingInstruction(match[1], match[2]) ; + return end+2; + }else{//error + return -1; + } + } + return -1; +} + +function ElementAttributes(){ + this.attributeNames = {} +} +ElementAttributes.prototype = { + setTagName:function(tagName){ + if(!tagNamePattern.test(tagName)){ + throw new Error('invalid tagName:'+tagName) + } + this.tagName = tagName + }, + addValue:function(qName, value, offset) { + if(!tagNamePattern.test(qName)){ + throw new Error('invalid attribute:'+qName) + } + this.attributeNames[qName] = this.length; + this[this.length++] = {qName:qName,value:value,offset:offset} + }, + length:0, + getLocalName:function(i){return this[i].localName}, + getLocator:function(i){return this[i].locator}, + getQName:function(i){return this[i].qName}, + getURI:function(i){return this[i].uri}, + getValue:function(i){return this[i].value} +// ,getIndex:function(uri, localName)){ +// if(localName){ +// +// }else{ +// var qName = uri +// } +// }, +// getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))}, +// getType:function(uri,localName){} +// getType:function(i){}, +} + + + +function split(source,start){ + var match; + var buf = []; + var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g; + reg.lastIndex = start; + reg.exec(source);//skip < + while(match = reg.exec(source)){ + buf.push(match); + if(match[1])return buf; + } +} + +exports.XMLReader = XMLReader; +exports.ParseError = ParseError; |