- Author:
- David Nickerson <david.nickerson@gmail.com>
- Date:
- 2021-09-17 15:50:49+12:00
- Desc:
- tweak html formatting
- Permanent Source URI:
- https://models.fieldml.org/workspace/a1/rawfile/1b3862589abf79ae9119ee0b5e99a8b785d762e1/dojo-presentation/js/dojo/dojox/xml/DomParser.js
dojo.provide("dojox.xml.DomParser");
dojox.xml.DomParser=new (function(){
/**********************************************************
* The DomParser is a close-to (but not entirely)
* conforming XML parser based on regular
* expressions. It will take any XML fragment
* and return a lightweight JS structure that is
* similar to (but not exactly) the DOM specification.
*
* Getter and setter methods are NOT available; the goal
* was to keep the resulting object model entirely JS-like.
*
* All node types but document fragments are supported;
* all nodes support getElementsByTagName and
* getElementsByTagNameNS (with short names byName and
* byNameNS). The document node supports getElementById
* (byId), and all nodes support a supplimental
* childrenByName/childrenByNameNS method as well.
*
* The object model is intended to be a READONLY format;
* mutation events are NOT supported, and though you
* can change properties on a node-by-node basis, certain
* operations are not supported (such as changing the ID
* of an element).
**********************************************************/
// internal use only.
var nodeTypes={ ELEMENT:1, ATTRIBUTE:2, TEXT:3, CDATA_SECTION:4, PROCESSING_INSTRUCTION:7, COMMENT:8, DOCUMENT:9 };
// compile the regular expressions once.
var reTags=/<([^>\/\s+]*)([^>]*)>([^<]*)/g;
var reAttr=/([^=]*)=(("([^"]*)")|('([^']*)'))/g; // patch from tdedischew AT gmail, with additional grouping
var reEntity=/<!ENTITY\s+([^"]*)\s+"([^"]*)">/g;
var reCData=/<!\[CDATA\[([\u0001-\uFFFF]*?)\]\]>/g;
var reComments=/<!--([\u0001-\uFFFF]*?)-->/g;
var trim=/^\s+|\s+$/g;
var normalize=/\s+/g;
var egt=/\>/g;
var elt=/\</g;
var equot=/\"/g;
var eapos=/\'/g;
var eamp=/\&/g;
var dNs="_def_";
// create a root node.
function _doc(){
return new (function(){
var all={};
this.nodeType=nodeTypes.DOCUMENT;
this.nodeName="#document";
this.namespaces={};
this._nsPaths={};
this.childNodes=[];
this.documentElement=null;
// any element with an ID attribute will be added to the internal hashtable.
this._add=function(obj){
if(typeof(obj.id)!="undefined"){ all[obj.id]=obj; }
};
this._remove=function(id){
if(all[id]){ delete all[id]; }
};
this.byId=this.getElementById=function(id){ return all[id]; };
this.byName=this.getElementsByTagName=byName;
this.byNameNS=this.getElementsByTagNameNS=byNameNS;
this.childrenByName=childrenByName;
})();
}
// functions attached to element nodes
function byName(name){
// return all descendants with name. Fully qualified (i.e. svg:svg)
function __(node, name, arr){
dojo.forEach(node.childNodes, function(c){
if(c.nodeType==nodeTypes.ELEMENT){
if(name=="*"){ arr.push(c); }
else if(c.nodeName==name){ arr.push(c); }
__(c, name, arr);
}
});
}
var a=[];
__(this, name, a);
return a;
}
function byNameNS(name, ns){
// return all descendants with name by namespace. If no namespace passed, the default is used.
function __(node, name, ns, arr){
dojo.forEach(node.childNodes, function(c){
if(c.nodeType==nodeTypes.ELEMENT){
if(name=="*"&&c.ownerDocument._nsPaths[ns]==c.namespace){ arr.push(c); }
else if(c.localName==name&&c.ownerDocument._nsPaths[ns]==c.namespace){ arr.push(c); }
__(c, name, ns, arr);
}
});
}
if(!ns){ ns=dNs; }
var a=[];
__(this, name, ns, a);
return a;
}
// Only child nodes with name.
function childrenByName(name){
var a=[];
dojo.forEach(this.childNodes, function(c){
if(c.nodeType==nodeTypes.ELEMENT){
if(name=="*"){ a.push(c); }
else if(c.nodeName==name){ a.push(c); }
}
});
return a;
}
function _createTextNode(v){
return {
nodeType:nodeTypes.TEXT,
nodeName:"#text",
nodeValue:v.replace(normalize," ").replace(egt,">").replace(elt,"<").replace(eapos,"'").replace(equot,'"').replace(eamp,"&")
};
}
// attribute functions
function getAttr(name){
for(var i=0; i<this.attributes.length; i++){
if(this.attributes[i].nodeName==name){
return this.attributes[i].nodeValue;
}
}
return null;
}
function getAttrNS(name, ns){
for(var i=0; i<this.attributes.length; i++){
if(this.ownerDocument._nsPaths[ns]==this.attributes[i].namespace
&&this.attributes[i].localName==name
){
return this.attributes[i].nodeValue;
}
}
return null;
}
// note that you can only swap IDs using setAttribute, NOT with setAttributeNS.
function setAttr(name, val){
var old=null;
for(var i=0; i<this.attributes.length; i++){
if(this.attributes[i].nodeName==name){
old=this.attributes[i].nodeValue;
this.attributes[i].nodeValue=val;
break;
}
}
if(name=="id"){
if(old!=null){ this.ownerDocument._remove(old); }
this.ownerDocument._add(this);
}
}
function setAttrNS(name, val, ns){
for(var i=0; i<this.attributes.length; i++){
if(this.ownerDocument._nsPaths[ns]==this.attributes[i].namespace
&&this.attributes[i].localName==name
){
this.attributes[i].nodeValue=val;
return;
}
}
}
// navigation
function prev(){
var p=this.parentNode;
if(p){
for(var i=0;i<p.childNodes.length;i++){
if(p.childNodes[i]==this&&i>0){
return p.childNodes[i-1];
}
}
}
return null;
}
function next(){
var p=this.parentNode;
if(p){
for(var i=0;i<p.childNodes.length;i++){
if(p.childNodes[i]==this&&(i+1)<p.childNodes.length){
return p.childNodes[i+1];
}
}
}
return null;
}
// the main method.
this.parse=function(/* String */str){
var root=_doc();
if(str==null){ return root; }
if(str.length==0){ return root; }
// preprocess custom entities
if(str.indexOf("<!ENTITY")>0){
var entity, eRe=[];
if(reEntity.test(str)){
reEntity.lastIndex=0;
// match entities
while((entity=reEntity.exec(str))!=null){
eRe.push({
entity:"&"+entity[1].replace(trim,"")+";",
expression:entity[2]
});
}
// replace instances in the document.
for(var i=0; i<eRe.length; i++){
str=str.replace(new RegExp(eRe[i].entity, "g"), eRe[i].expression);
}
}
}
// pre-parse for CData, and tokenize.
var cdSections=[], cdata;
while((cdata=reCData.exec(str))!=null){ cdSections.push(cdata[1]); }
for(var i=0; i<cdSections.length; i++){ str=str.replace(cdSections[i], i); }
// pre-parse for comments, and tokenize.
var comments=[], comment;
while((comment=reComments.exec(str))!=null){ comments.push(comment[1]); }
for(i=0; i<comments.length; i++){ str=str.replace(comments[i], i); }
// parse the document
var res, obj=root;
while((res=reTags.exec(str))!=null){
// closing tags.
if(res[2].charAt(0)=="/"){
if(obj.parentNode){
obj=obj.parentNode;
}
var text=(res[3]||"").replace(trim, "");
if(text.length>0) {
obj.childNodes.push(_createTextNode(text));
}
}
// open tags.
else if(res[1].length>0){
// figure out the type of node.
if(res[1].charAt(0)=="?"){
// processing instruction
var name=res[1].substr(1);
var target=res[2].substr(0,res[2].length-2);
obj.childNodes.push({
nodeType:nodeTypes.PROCESSING_INSTRUCTION,
nodeName:name,
nodeValue:target
});
}
else if(res[1].charAt(0)=="!"){
// CDATA; skip over any declaration elements.
if(res[1].indexOf("![CDATA[")==0){
var val=parseInt(res[1].replace("![CDATA[","").replace("]]",""));
obj.childNodes.push({
nodeType:nodeTypes.CDATA_SECTION,
nodeName:"#cdata-section",
nodeValue:cdSections[val]
});
}
// Comments.
else if(res[1].substr(0,3)=="!--"){
var val=parseInt(res[1].replace("!--","").replace("--",""));
obj.childNodes.push({
nodeType:nodeTypes.COMMENT,
nodeName:"#comment",
nodeValue:comments[val]
});
}
}
else {
// Elements (with attribute and text)
var name=res[1].replace(trim,"");
var o={
nodeType:nodeTypes.ELEMENT,
nodeName:name,
localName:name,
namespace:dNs,
ownerDocument:root,
attributes:[],
parentNode:null,
childNodes:[]
};
// check to see if it's namespaced.
if(name.indexOf(":")>-1){
var t=name.split(":");
o.namespace=t[0];
o.localName=t[1];
}
// set the function references.
o.byName=o.getElementsByTagName=byName;
o.byNameNS=o.getElementsByTagNameNS=byNameNS;
o.childrenByName=childrenByName;
o.getAttribute=getAttr;
o.getAttributeNS=getAttrNS;
o.setAttribute=setAttr;
o.setAttributeNS=setAttrNS;
o.previous=o.previousSibling=prev;
o.next=o.nextSibling=next;
// parse the attribute string.
var attr;
while((attr=reAttr.exec(res[2]))!=null){
if(attr.length>0){
var name=attr[1].replace(trim,"");
var val=(attr[4]||attr[6]).replace(normalize," ")
.replace(egt,">")
.replace(elt,"<")
.replace(eapos,"'")
.replace(equot,'"')
.replace(eamp,"&");
if(name.indexOf("xmlns")==0){
if(name.indexOf(":")>0){
var ns=name.split(":");
root.namespaces[ns[1]]=val;
root._nsPaths[val]=ns[1];
} else {
root.namespaces[dNs]=val;
root._nsPaths[val]=dNs;
}
} else {
var ln=name;
var ns=dNs;
if(name.indexOf(":")>0){
var t=name.split(":");
ln=t[1];
ns=t[0];
}
o.attributes.push({
nodeType:nodeTypes.ATTRIBUTE,
nodeName:name,
localName:ln,
namespace:ns,
nodeValue:val
});
// only add id as a property.
if(ln=="id"){ o.id=val; }
}
}
}
root._add(o);
if(obj){
obj.childNodes.push(o);
o.parentNode=obj;
// if it's not a self-closing node.
if(res[2].charAt(res[2].length-1)!="/"){
obj=o;
}
}
var text=res[3];
if(text.length>0){
obj.childNodes.push(_createTextNode(text));
}
}
}
}
// set the document element
for(var i=0; i<root.childNodes.length; i++){
var e=root.childNodes[i];
if(e.nodeType==nodeTypes.ELEMENT){
root.documentElement=e;
break;
}
}
return root;
};
})();