- Author:
- David Nickerson <nickerso@users.sourceforge.net>
- Date:
- 2009-07-16 02:00:03+12:00
- Desc:
- the starting point for the HH tutorial example
- Permanent Source URI:
- https://models.fieldml.org/workspace/a1/rawfile/f6a8f90307388eb4b040ee3566b84d88b59247f7/dojo-presentation/js/dojo/dojox/highlight/_base.js
dojo.provide("dojox.highlight._base");
//
// dojox.highlight - syntax highlighting with language auto-detection
// released under CLA by the Dojo Toolkit
// orig BSD release available from: http://softwaremaniacs.org/soft/highlight/
//
(function(){
var dh = dojox.highlight,
C_NUMBER_RE = '\\b(0x[A-Za-z0-9]+|\\d+(\\.\\d+)?)';
// constants
dh.constants = {
IDENT_RE: '[a-zA-Z][a-zA-Z0-9_]*',
UNDERSCORE_IDENT_RE: '[a-zA-Z_][a-zA-Z0-9_]*',
NUMBER_RE: '\\b\\d+(\\.\\d+)?',
C_NUMBER_RE: C_NUMBER_RE,
// Common modes
APOS_STRING_MODE: {
className: 'string',
begin: '\'', end: '\'',
illegal: '\\n',
contains: ['escape'],
relevance: 0
},
QUOTE_STRING_MODE: {
className: 'string',
begin: '"',
end: '"',
illegal: '\\n',
contains: ['escape'],
relevance: 0
},
BACKSLASH_ESCAPE: {
className: 'escape',
begin: '\\\\.', end: '^',
relevance: 0
},
C_LINE_COMMENT_MODE: {
className: 'comment',
begin: '//', end: '$',
relevance: 0
},
C_BLOCK_COMMENT_MODE: {
className: 'comment',
begin: '/\\*', end: '\\*/'
},
HASH_COMMENT_MODE: {
className: 'comment',
begin: '#', end: '$'
},
C_NUMBER_MODE: {
className: 'number',
begin: C_NUMBER_RE, end: '^',
relevance: 0
}
};
// utilities
function esc(value){
return value.replace(/&/gm, '&').replace(/</gm, '<').replace(/>/gm, '>');
}
function verifyText(block){
return dojo.every(block.childNodes, function(node){
return node.nodeType == 3 || String(node.nodeName).toLowerCase() == 'br';
});
}
function blockText(block){
var result = [];
dojo.forEach(block.childNodes, function(node){
if(node.nodeType == 3){
result.push(node.nodeValue);
}else if(String(node.nodeName).toLowerCase() == 'br'){
result.push("\n");
}else{
throw 'Complex markup';
}
});
return result.join("");
}
function buildKeywordGroups(mode){
if(!mode.keywordGroups){
for(var key in mode.keywords){
var kw = mode.keywords[key];
if(kw instanceof Object){ // dojo.isObject?
mode.keywordGroups = mode.keywords;
}else{
mode.keywordGroups = {keyword: mode.keywords};
}
break;
}
}
}
function buildKeywords(lang){
if(lang.defaultMode && lang.modes){
buildKeywordGroups(lang.defaultMode);
dojo.forEach(lang.modes, buildKeywordGroups);
}
}
// main object
var Highlighter = function(langName, textBlock){
// initialize the state
this.langName = langName;
this.lang = dh.languages[langName];
this.modes = [this.lang.defaultMode];
this.relevance = 0;
this.keywordCount = 0;
this.result = [];
// build resources lazily
if(!this.lang.defaultMode.illegalRe){
this.buildRes();
buildKeywords(this.lang);
}
// run the algorithm
try{
this.highlight(textBlock);
this.result = this.result.join("");
}catch(e){
if(e == 'Illegal'){
this.relevance = 0;
this.keywordCount = 0;
this.result = esc(textBlock);
}else{
throw e;
}
}
};
dojo.extend(Highlighter, {
buildRes: function(){
dojo.forEach(this.lang.modes, function(mode){
if(mode.begin){
mode.beginRe = this.langRe('^' + mode.begin);
}
if(mode.end){
mode.endRe = this.langRe('^' + mode.end);
}
if(mode.illegal){
mode.illegalRe = this.langRe('^(?:' + mode.illegal + ')');
}
}, this);
this.lang.defaultMode.illegalRe = this.langRe('^(?:' + this.lang.defaultMode.illegal + ')');
},
subMode: function(lexeme){
var classes = this.modes[this.modes.length - 1].contains;
if(classes){
var modes = this.lang.modes;
for(var i = 0; i < classes.length; ++i){
var className = classes[i];
for(var j = 0; j < modes.length; ++j){
var mode = modes[j];
if(mode.className == className && mode.beginRe.test(lexeme)){ return mode; }
}
}
}
return null;
},
endOfMode: function(lexeme){
for(var i = this.modes.length - 1; i >= 0; --i){
var mode = this.modes[i];
if(mode.end && mode.endRe.test(lexeme)){ return this.modes.length - i; }
if(!mode.endsWithParent){ break; }
}
return 0;
},
isIllegal: function(lexeme){
var illegalRe = this.modes[this.modes.length - 1].illegalRe;
return illegalRe && illegalRe.test(lexeme);
},
langRe: function(value, global){
var mode = 'm' + (this.lang.case_insensitive ? 'i' : '') + (global ? 'g' : '');
return new RegExp(value, mode);
},
buildTerminators: function(){
var mode = this.modes[this.modes.length - 1],
terminators = {};
if(mode.contains){
dojo.forEach(this.lang.modes, function(lmode){
if(dojo.indexOf(mode.contains, lmode.className) >= 0){
terminators[lmode.begin] = 1;
}
});
}
for(var i = this.modes.length - 1; i >= 0; --i){
var m = this.modes[i];
if(m.end){ terminators[m.end] = 1; }
if(!m.endsWithParent){ break; }
}
if(mode.illegal){ terminators[mode.illegal] = 1; }
var t = [];
for(i in terminators){ t.push(i); }
mode.terminatorsRe = this.langRe("(" + t.join("|") + ")");
},
eatModeChunk: function(value, index){
var mode = this.modes[this.modes.length - 1];
// create terminators lazily
if(!mode.terminatorsRe){
this.buildTerminators();
}
value = value.substr(index);
var match = mode.terminatorsRe.exec(value);
if(!match){
return {
buffer: value,
lexeme: "",
end: true
};
}
return {
buffer: match.index ? value.substr(0, match.index) : "",
lexeme: match[0],
end: false
};
},
keywordMatch: function(mode, match){
var matchStr = match[0];
if(this.lang.case_insensitive){ matchStr = matchStr.toLowerCase(); }
for(var className in mode.keywordGroups){
if(matchStr in mode.keywordGroups[className]){ return className; }
}
return "";
},
buildLexemes: function(mode){
var lexemes = {};
dojo.forEach(mode.lexems, function(lexeme){
lexemes[lexeme] = 1;
});
var t = [];
for(var i in lexemes){ t.push(i); }
mode.lexemsRe = this.langRe("(" + t.join("|") + ")", true);
},
processKeywords: function(buffer){
var mode = this.modes[this.modes.length - 1];
if(!mode.keywords || !mode.lexems){
return esc(buffer);
}
// create lexemes lazily
if(!mode.lexemsRe){
this.buildLexemes(mode);
}
mode.lexemsRe.lastIndex = 0;
var result = [], lastIndex = 0,
match = mode.lexemsRe.exec(buffer);
while(match){
result.push(esc(buffer.substr(lastIndex, match.index - lastIndex)));
var keywordM = this.keywordMatch(mode, match);
if(keywordM){
++this.keywordCount;
result.push('<span class="'+ keywordM +'">' + esc(match[0]) + '</span>');
}else{
result.push(esc(match[0]));
}
lastIndex = mode.lexemsRe.lastIndex;
match = mode.lexemsRe.exec(buffer);
}
result.push(esc(buffer.substr(lastIndex, buffer.length - lastIndex)));
return result.join("");
},
processModeInfo: function(buffer, lexeme, end) {
var mode = this.modes[this.modes.length - 1];
if(end){
this.result.push(this.processKeywords(mode.buffer + buffer));
return;
}
if(this.isIllegal(lexeme)){ throw 'Illegal'; }
var newMode = this.subMode(lexeme);
if(newMode){
mode.buffer += buffer;
this.result.push(this.processKeywords(mode.buffer));
if(newMode.excludeBegin){
this.result.push(lexeme + '<span class="' + newMode.className + '">');
newMode.buffer = '';
}else{
this.result.push('<span class="' + newMode.className + '">');
newMode.buffer = lexeme;
}
this.modes.push(newMode);
this.relevance += typeof newMode.relevance == "number" ? newMode.relevance : 1;
return;
}
var endLevel = this.endOfMode(lexeme);
if(endLevel){
mode.buffer += buffer;
if(mode.excludeEnd){
this.result.push(this.processKeywords(mode.buffer) + '</span>' + lexeme);
}else{
this.result.push(this.processKeywords(mode.buffer + lexeme) + '</span>');
}
while(endLevel > 1){
this.result.push('</span>');
--endLevel;
this.modes.pop();
}
this.modes.pop();
this.modes[this.modes.length - 1].buffer = '';
return;
}
},
highlight: function(value){
var index = 0;
this.lang.defaultMode.buffer = '';
do{
var modeInfo = this.eatModeChunk(value, index);
this.processModeInfo(modeInfo.buffer, modeInfo.lexeme, modeInfo.end);
index += modeInfo.buffer.length + modeInfo.lexeme.length;
}while(!modeInfo.end);
if(this.modes.length > 1){
throw 'Illegal';
}
}
});
// more utilities
function replaceText(node, className, text){
if(String(node.tagName).toLowerCase() == "code" && String(node.parentNode.tagName).toLowerCase() == "pre"){
// See these 4 lines? This is IE's notion of "node.innerHTML = text". Love this browser :-/
var container = document.createElement('div'),
environment = node.parentNode.parentNode;
container.innerHTML = '<pre><code class="' + className + '">' + text + '</code></pre>';
environment.replaceChild(container.firstChild, node.parentNode);
}else{
node.className = className;
node.innerHTML = text;
}
}
function highlightLanguage(block, lang){
var highlight = new Highlighter(lang, blockText(block));
replaceText(block, block.className, highlight.result);
}
function highlightAuto(block){
var result = "", langName = "", bestRelevance = 2,
textBlock = blockText(block);
for(var key in dh.languages){
if(!dh.languages[key].defaultMode){ continue; } // skip internal members
var highlight = new Highlighter(key, textBlock),
relevance = highlight.keywordCount + highlight.relevance, relevanceMax = 0;
if(!result || relevance > relevanceMax){
relevanceMax = relevance;
result = highlight.result;
langName = highlight.langName;
}
}
if(result){
replaceText(block, langName, result);
}
}
// the public API
dh.init = function(/* DomNode */ block){
// summary: the main (only required) public API. highlight a node.
if(dojo.hasClass(block,"no-highlight")){ return; }
if(!verifyText(block)){ return; }
var classes = block.className.split(/\s+/),
flag = dojo.some(classes, function(className){
if(className.charAt(0) != "_" && dh.languages[className]){
highlightLanguage(block, className);
return true; // stop iterations
}
return false; // continue iterations
});
if(!flag){
highlightAuto(block);
}
};
// pseudo object for markup creation
dh.Code = function(params, node){
dh.init(node);
};
})();