sunlight.js 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. /**
  2. * Sunlight
  3. * Intelligent syntax highlighting
  4. *
  5. * http://sunlightjs.com/
  6. *
  7. * by Tommy Montgomery <http://tmont.com>
  8. * Licensed under WTFPL <http://sam.zoy.org/wtfpl/>
  9. */
  10. (function(window, document, undefined){
  11. var
  12. //http://webreflection.blogspot.com/2009/01/32-bytes-to-know-if-your-browser-is-ie.html
  13. //we have to sniff this because IE requires \r
  14. isIe = !+"\v1",
  15. EOL = isIe ? "\r" : "\n",
  16. EMPTY = function() { return null; },
  17. HIGHLIGHTED_NODE_COUNT = 0,
  18. DEFAULT_LANGUAGE = "plaintext",
  19. DEFAULT_CLASS_PREFIX = "sunlight-",
  20. //global sunlight variables
  21. defaultAnalyzer,
  22. getComputedStyle,
  23. globalOptions = {
  24. tabWidth: 4,
  25. classPrefix: DEFAULT_CLASS_PREFIX,
  26. showWhitespace: false,
  27. maxHeight: false
  28. },
  29. languages = {},
  30. languageDefaults = {},
  31. events = {
  32. beforeHighlightNode: [],
  33. beforeHighlight: [],
  34. beforeTokenize: [],
  35. afterTokenize: [],
  36. beforeAnalyze: [],
  37. afterAnalyze: [],
  38. afterHighlight: [],
  39. afterHighlightNode: []
  40. };
  41. defaultAnalyzer = (function() {
  42. function defaultHandleToken(suffix) {
  43. return function(context) {
  44. var element = document.createElement("span");
  45. element.className = context.options.classPrefix + suffix;
  46. element.appendChild(context.createTextNode(context.tokens[context.index]));
  47. return context.addNode(element) || true;
  48. };
  49. }
  50. return {
  51. handleToken: function(context) {
  52. return defaultHandleToken(context.tokens[context.index].name)(context);
  53. },
  54. //just append default content as a text node
  55. handle_default: function(context) {
  56. return context.addNode(context.createTextNode(context.tokens[context.index]));
  57. },
  58. //this handles the named ident mayhem
  59. handle_ident: function(context) {
  60. var evaluate = function(rules, createRule) {
  61. var i;
  62. rules = rules || [];
  63. for (i = 0; i < rules.length; i++) {
  64. if (typeof(rules[i]) === "function") {
  65. if (rules[i](context)) {
  66. return defaultHandleToken("named-ident")(context);
  67. }
  68. } else if (createRule && createRule(rules[i])(context.tokens)) {
  69. return defaultHandleToken("named-ident")(context);
  70. }
  71. }
  72. return false;
  73. };
  74. return evaluate(context.language.namedIdentRules.custom)
  75. || evaluate(context.language.namedIdentRules.follows, function(ruleData) { return createProceduralRule(context.index - 1, -1, ruleData, context.language.caseInsensitive); })
  76. || evaluate(context.language.namedIdentRules.precedes, function(ruleData) { return createProceduralRule(context.index + 1, 1, ruleData, context.language.caseInsensitive); })
  77. || evaluate(context.language.namedIdentRules.between, function(ruleData) { return createBetweenRule(context.index, ruleData.opener, ruleData.closer, context.language.caseInsensitive); })
  78. || defaultHandleToken("ident")(context);
  79. }
  80. };
  81. }());
  82. languageDefaults = {
  83. analyzer: create(defaultAnalyzer),
  84. customTokens: [],
  85. namedIdentRules: {},
  86. punctuation: /[^\w\s]/,
  87. numberParser: defaultNumberParser,
  88. caseInsensitive: false,
  89. doNotParse: /\s/,
  90. contextItems: {},
  91. embeddedLanguages: {}
  92. };
  93. //adapted from http://blargh.tommymontgomery.com/2010/04/get-computed-style-in-javascript/
  94. getComputedStyle = (function() {
  95. var func = null;
  96. if (document.defaultView && document.defaultView.getComputedStyle) {
  97. func = document.defaultView.getComputedStyle;
  98. } else {
  99. func = function(element, anything) {
  100. return element["currentStyle"] || {};
  101. };
  102. }
  103. return function(element, style) {
  104. return func(element, null)[style];
  105. }
  106. }());
  107. //-----------
  108. //FUNCTIONS
  109. //-----------
  110. function createCodeReader(text) {
  111. var index = 0,
  112. line = 1,
  113. column = 1,
  114. length,
  115. EOF = undefined,
  116. currentChar,
  117. nextReadBeginsLine;
  118. text = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n"); //normalize line endings to unix
  119. length = text.length;
  120. currentChar = length > 0 ? text.charAt(0) : EOF;
  121. function getCharacters(count) {
  122. var value;
  123. if (count === 0) {
  124. return "";
  125. }
  126. count = count || 1;
  127. value = text.substring(index + 1, index + count + 1);
  128. return value === "" ? EOF : value;
  129. }
  130. return {
  131. toString: function() {
  132. return "length: " + length + ", index: " + index + ", line: " + line + ", column: " + column + ", current: [" + currentChar + "]";
  133. },
  134. peek: function(count) {
  135. return getCharacters(count);
  136. },
  137. substring: function() {
  138. return text.substring(index);
  139. },
  140. peekSubstring: function() {
  141. return text.substring(index + 1);
  142. },
  143. read: function(count) {
  144. var value = getCharacters(count),
  145. newlineCount,
  146. lastChar;
  147. if (value === "") {
  148. //this is a result of reading/peeking/doing nothing
  149. return value;
  150. }
  151. if (value !== EOF) {
  152. //advance index
  153. index += value.length;
  154. column += value.length;
  155. //update line count
  156. if (nextReadBeginsLine) {
  157. line++;
  158. column = 1;
  159. nextReadBeginsLine = false;
  160. }
  161. newlineCount = value.substring(0, value.length - 1).replace(/[^\n]/g, "").length;
  162. if (newlineCount > 0) {
  163. line += newlineCount;
  164. column = 1;
  165. }
  166. lastChar = last(value);
  167. if (lastChar === "\n") {
  168. nextReadBeginsLine = true;
  169. }
  170. currentChar = lastChar;
  171. } else {
  172. index = length;
  173. currentChar = EOF;
  174. }
  175. return value;
  176. },
  177. text: function() { return text; },
  178. getLine: function() { return line; },
  179. getColumn: function() { return column; },
  180. isEof: function() { return index >= length; },
  181. isSol: function() { return column === 1; },
  182. isSolWs: function() {
  183. var temp = index,
  184. c;
  185. if (column === 1) {
  186. return true;
  187. }
  188. //look backward until we find a newline or a non-whitespace character
  189. while ((c = text.charAt(--temp)) !== "") {
  190. if (c === "\n") {
  191. return true;
  192. }
  193. if (!/\s/.test(c)) {
  194. return false;
  195. }
  196. }
  197. return true;
  198. },
  199. isEol: function() { return nextReadBeginsLine; },
  200. EOF: EOF,
  201. current: function() { return currentChar; }
  202. };
  203. }
  204. //http://javascript.crockford.com/prototypal.html
  205. function create(o) {
  206. function F() {}
  207. F.prototype = o;
  208. return new F();
  209. }
  210. function appendAll(parent, children) {
  211. var i;
  212. for (i = 0; i < children.length; i++) {
  213. parent.appendChild(children[i]);
  214. }
  215. }
  216. //gets the last character in a string or the last element in an array
  217. function last(thing) {
  218. return thing.charAt ? thing.charAt(thing.length - 1) : thing[thing.length - 1];
  219. }
  220. //array.contains()
  221. function contains(arr, value, caseInsensitive) {
  222. var i;
  223. if (arr.indexOf && !caseInsensitive) {
  224. return arr.indexOf(value) >= 0;
  225. }
  226. for (i = 0; i < arr.length; i++) {
  227. if (arr[i] === value) {
  228. return true;
  229. }
  230. if (caseInsensitive && typeof(arr[i]) === "string" && typeof(value) === "string" && arr[i].toUpperCase() === value.toUpperCase()) {
  231. return true;
  232. }
  233. }
  234. return false;
  235. }
  236. //non-recursively merges one object into the other
  237. function merge(defaultObject, objectToMerge) {
  238. var key;
  239. if (!objectToMerge) {
  240. return defaultObject;
  241. }
  242. for (key in objectToMerge) {
  243. defaultObject[key] = objectToMerge[key];
  244. }
  245. return defaultObject;
  246. }
  247. function clone(object) {
  248. return merge({}, object);
  249. }
  250. //http://stackoverflow.com/questions/3561493/is-there-a-regexp-escape-function-in-javascript/3561711#3561711
  251. function regexEscape(s) {
  252. return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
  253. }
  254. function createProceduralRule(startIndex, direction, tokenRequirements, caseInsensitive) {
  255. tokenRequirements = tokenRequirements.slice(0);
  256. return function(tokens) {
  257. var tokenIndexStart = startIndex,
  258. j,
  259. expected,
  260. actual;
  261. if (direction === 1) {
  262. tokenRequirements.reverse();
  263. }
  264. for (j = 0; j < tokenRequirements.length; j++) {
  265. actual = tokens[tokenIndexStart + (j * direction)];
  266. expected = tokenRequirements[tokenRequirements.length - 1 - j];
  267. if (actual === undefined) {
  268. if (expected["optional"] !== undefined && expected.optional) {
  269. tokenIndexStart -= direction;
  270. } else {
  271. return false;
  272. }
  273. } else if (actual.name === expected.token && (expected["values"] === undefined || contains(expected.values, actual.value, caseInsensitive))) {
  274. //derp
  275. continue;
  276. } else if (expected["optional"] !== undefined && expected.optional) {
  277. tokenIndexStart -= direction; //we need to reevaluate against this token again
  278. } else {
  279. return false;
  280. }
  281. }
  282. return true;
  283. };
  284. }
  285. function createBetweenRule(startIndex, opener, closer, caseInsensitive) {
  286. return function(tokens) {
  287. var index = startIndex,
  288. token,
  289. success = false;
  290. //check to the left: if we run into a closer or never run into an opener, fail
  291. while ((token = tokens[--index]) !== undefined) {
  292. if (token.name === closer.token && contains(closer.values, token.value)) {
  293. if (token.name === opener.token && contains(opener.values, token.value, caseInsensitive)) {
  294. //if the closer is the same as the opener that's okay
  295. success = true;
  296. break;
  297. }
  298. return false;
  299. }
  300. if (token.name === opener.token && contains(opener.values, token.value, caseInsensitive)) {
  301. success = true;
  302. break;
  303. }
  304. }
  305. if (!success) {
  306. return false;
  307. }
  308. //check to the right for the closer
  309. index = startIndex;
  310. while ((token = tokens[++index]) !== undefined) {
  311. if (token.name === opener.token && contains(opener.values, token.value, caseInsensitive)) {
  312. if (token.name === closer.token && contains(closer.values, token.value, caseInsensitive)) {
  313. //if the closer is the same as the opener that's okay
  314. success = true;
  315. break;
  316. }
  317. return false;
  318. }
  319. if (token.name === closer.token && contains(closer.values, token.value, caseInsensitive)) {
  320. success = true;
  321. break;
  322. }
  323. }
  324. return success;
  325. };
  326. }
  327. function matchWord(context, wordMap, tokenName, doNotRead) {
  328. var current = context.reader.current(),
  329. i,
  330. word,
  331. peek,
  332. line = context.reader.getLine(),
  333. column = context.reader.getColumn();
  334. wordMap = wordMap || [];
  335. if (context.language.caseInsensitive) {
  336. current = current.toUpperCase();
  337. }
  338. if (!wordMap[current]) {
  339. return null;
  340. }
  341. wordMap = wordMap[current];
  342. for (i = 0; i < wordMap.length; i++) {
  343. word = wordMap[i].value;
  344. peek = current + context.reader.peek(word.length);
  345. if (word === peek || wordMap[i].regex.test(peek)) {
  346. return context.createToken(
  347. tokenName,
  348. context.reader.current() + context.reader[doNotRead ? "peek" : "read"](word.length - 1),
  349. line,
  350. column
  351. );
  352. }
  353. }
  354. return null;
  355. }
  356. //gets the next token in the specified direction while matcher matches the current token
  357. function getNextWhile(tokens, index, direction, matcher) {
  358. var count = 1,
  359. token;
  360. direction = direction || 1;
  361. while (token = tokens[index + (direction * count++)]) {
  362. if (!matcher(token)) {
  363. return token;
  364. }
  365. }
  366. return undefined;
  367. }
  368. //this is crucial for performance
  369. function createHashMap(wordMap, boundary, caseInsensitive) {
  370. //creates a hash table where the hash is the first character of the word
  371. var newMap = { },
  372. i,
  373. word,
  374. firstChar;
  375. for (i = 0; i < wordMap.length; i++) {
  376. word = caseInsensitive ? wordMap[i].toUpperCase() : wordMap[i];
  377. firstChar = word.charAt(0);
  378. if (!newMap[firstChar]) {
  379. newMap[firstChar] = [];
  380. }
  381. newMap[firstChar].push({ value: word, regex: new RegExp("^" + regexEscape(word) + boundary, caseInsensitive ? "i" : "") });
  382. }
  383. return newMap;
  384. }
  385. function defaultNumberParser(context) {
  386. var current = context.reader.current(),
  387. number,
  388. line = context.reader.getLine(),
  389. column = context.reader.getColumn(),
  390. allowDecimal = true,
  391. peek;
  392. if (!/\d/.test(current)) {
  393. //is it a decimal followed by a number?
  394. if (current !== "." || !/\d/.test(context.reader.peek())) {
  395. return null;
  396. }
  397. //decimal without leading zero
  398. number = current + context.reader.read();
  399. allowDecimal = false;
  400. } else {
  401. number = current;
  402. if (current === "0" && context.reader.peek() !== ".") {
  403. //hex or octal
  404. allowDecimal = false;
  405. }
  406. }
  407. //easy way out: read until it's not a number or letter
  408. //this will work for hex (0xef), octal (012), decimal and scientific notation (1e3)
  409. //anything else and you're on your own
  410. while ((peek = context.reader.peek()) !== context.reader.EOF) {
  411. if (!/[A-Za-z0-9]/.test(peek)) {
  412. if (peek === "." && allowDecimal && /\d$/.test(context.reader.peek(2))) {
  413. number += context.reader.read();
  414. allowDecimal = false;
  415. continue;
  416. }
  417. break;
  418. }
  419. number += context.reader.read();
  420. }
  421. return context.createToken("number", number, line, column);
  422. }
  423. function fireEvent(eventName, highlighter, eventContext) {
  424. var delegates = events[eventName] || [],
  425. i;
  426. for (i = 0; i < delegates.length; i++) {
  427. delegates[i].call(highlighter, eventContext);
  428. }
  429. }
  430. function Highlighter(options) {
  431. this.options = merge(clone(globalOptions), options);
  432. }
  433. Highlighter.prototype = (function() {
  434. var parseNextToken = (function() {
  435. function isIdentMatch(context) {
  436. return context.language.identFirstLetter && context.language.identFirstLetter.test(context.reader.current());
  437. }
  438. //token parsing functions
  439. function parseKeyword(context) {
  440. return matchWord(context, context.language.keywords, "keyword");
  441. }
  442. function parseCustomTokens(context) {
  443. var tokenName,
  444. token;
  445. if (context.language.customTokens === undefined) {
  446. return null;
  447. }
  448. for (tokenName in context.language.customTokens) {
  449. token = matchWord(context, context.language.customTokens[tokenName], tokenName);
  450. if (token !== null) {
  451. return token;
  452. }
  453. }
  454. return null;
  455. }
  456. function parseOperator(context) {
  457. return matchWord(context, context.language.operators, "operator");
  458. }
  459. function parsePunctuation(context) {
  460. var current = context.reader.current();
  461. if (context.language.punctuation.test(regexEscape(current))) {
  462. return context.createToken("punctuation", current, context.reader.getLine(), context.reader.getColumn());
  463. }
  464. return null;
  465. }
  466. function parseIdent(context) {
  467. var ident,
  468. peek,
  469. line = context.reader.getLine(),
  470. column = context.reader.getColumn();
  471. if (!isIdentMatch(context)) {
  472. return null;
  473. }
  474. ident = context.reader.current();
  475. while ((peek = context.reader.peek()) !== context.reader.EOF) {
  476. if (!context.language.identAfterFirstLetter.test(peek)) {
  477. break;
  478. }
  479. ident += context.reader.read();
  480. }
  481. return context.createToken("ident", ident, line, column);
  482. }
  483. function parseDefault(context) {
  484. if (context.defaultData.text === "") {
  485. //new default token
  486. context.defaultData.line = context.reader.getLine();
  487. context.defaultData.column = context.reader.getColumn();
  488. }
  489. context.defaultData.text += context.reader.current();
  490. return null;
  491. }
  492. function parseScopes(context) {
  493. var current = context.reader.current(),
  494. tokenName,
  495. specificScopes,
  496. j,
  497. opener,
  498. line,
  499. column,
  500. continuation,
  501. value;
  502. for (tokenName in context.language.scopes) {
  503. specificScopes = context.language.scopes[tokenName];
  504. for (j = 0; j < specificScopes.length; j++) {
  505. opener = specificScopes[j][0];
  506. value = current + context.reader.peek(opener.length - 1);
  507. if (opener !== value && (!context.language.caseInsensitive || value.toUpperCase() !== opener.toUpperCase())) {
  508. continue;
  509. }
  510. line = context.reader.getLine(), column = context.reader.getColumn();
  511. context.reader.read(opener.length - 1);
  512. continuation = getScopeReaderFunction(specificScopes[j], tokenName);
  513. return continuation(context, continuation, value, line, column);
  514. }
  515. }
  516. return null;
  517. }
  518. function parseNumber(context) {
  519. return context.language.numberParser(context);
  520. }
  521. function parseCustomRules(context) {
  522. var customRules = context.language.customParseRules,
  523. i,
  524. token;
  525. if (customRules === undefined) {
  526. return null;
  527. }
  528. for (i = 0; i < customRules.length; i++) {
  529. token = customRules[i](context);
  530. if (token) {
  531. return token;
  532. }
  533. }
  534. return null;
  535. }
  536. return function(context) {
  537. if (context.language.doNotParse.test(context.reader.current())) {
  538. return parseDefault(context);
  539. }
  540. return parseCustomRules(context)
  541. || parseCustomTokens(context)
  542. || parseKeyword(context)
  543. || parseScopes(context)
  544. || parseIdent(context)
  545. || parseNumber(context)
  546. || parseOperator(context)
  547. || parsePunctuation(context)
  548. || parseDefault(context);
  549. }
  550. }());
  551. function getScopeReaderFunction(scope, tokenName) {
  552. var escapeSequences = scope[2] || [],
  553. closerLength = scope[1].length,
  554. closer = typeof(scope[1]) === "string" ? new RegExp(regexEscape(scope[1])) : scope[1].regex,
  555. zeroWidth = scope[3] || false;
  556. //processCurrent indicates that this is being called from a continuation
  557. //which means that we need to process the current char, rather than peeking at the next
  558. return function(context, continuation, buffer, line, column, processCurrent) {
  559. var foundCloser = false;
  560. buffer = buffer || "";
  561. processCurrent = processCurrent ? 1 : 0;
  562. function process(processCurrent) {
  563. //check for escape sequences
  564. var peekValue,
  565. current = context.reader.current(),
  566. i;
  567. for (i = 0; i < escapeSequences.length; i++) {
  568. peekValue = (processCurrent ? current : "") + context.reader.peek(escapeSequences[i].length - processCurrent);
  569. if (peekValue === escapeSequences[i]) {
  570. buffer += context.reader.read(peekValue.length - processCurrent);
  571. return true;
  572. }
  573. }
  574. peekValue = (processCurrent ? current : "") + context.reader.peek(closerLength - processCurrent);
  575. if (closer.test(peekValue)) {
  576. foundCloser = true;
  577. return false;
  578. }
  579. buffer += processCurrent ? current : context.reader.read();
  580. return true;
  581. };
  582. if (!processCurrent || process(true)) {
  583. while (context.reader.peek() !== context.reader.EOF && process(false)) { }
  584. }
  585. if (processCurrent) {
  586. buffer += context.reader.current();
  587. context.reader.read();
  588. } else {
  589. buffer += zeroWidth || context.reader.peek() === context.reader.EOF ? "" : context.reader.read(closerLength);
  590. }
  591. if (!foundCloser) {
  592. //we need to signal to the context that this scope was never properly closed
  593. //this has significance for partial parses (e.g. for nested languages)
  594. context.continuation = continuation;
  595. }
  596. return context.createToken(tokenName, buffer, line, column);
  597. };
  598. }
  599. //called before processing the current
  600. function switchToEmbeddedLanguageIfNecessary(context) {
  601. var i,
  602. embeddedLanguage;
  603. for (i = 0; i < context.language.embeddedLanguages.length; i++) {
  604. if (!languages[context.language.embeddedLanguages[i].language]) {
  605. //unregistered language
  606. continue;
  607. }
  608. embeddedLanguage = clone(context.language.embeddedLanguages[i]);
  609. if (embeddedLanguage.switchTo(context)) {
  610. embeddedLanguage.oldItems = clone(context.items);
  611. context.embeddedLanguageStack.push(embeddedLanguage);
  612. context.language = languages[embeddedLanguage.language];
  613. context.items = merge(context.items, clone(context.language.contextItems));
  614. break;
  615. }
  616. }
  617. }
  618. //called after processing the current
  619. function switchBackFromEmbeddedLanguageIfNecessary(context) {
  620. var current = last(context.embeddedLanguageStack),
  621. lang;
  622. if (current && current.switchBack(context)) {
  623. context.language = languages[current.parentLanguage];
  624. lang = context.embeddedLanguageStack.pop();
  625. //restore old items
  626. context.items = clone(lang.oldItems);
  627. lang.oldItems = {};
  628. }
  629. }
  630. function tokenize(unhighlightedCode, language, partialContext, options) {
  631. var tokens = [],
  632. context,
  633. continuation,
  634. token;
  635. fireEvent("beforeTokenize", this, { code: unhighlightedCode, language: language });
  636. context = {
  637. reader: createCodeReader(unhighlightedCode),
  638. language: language,
  639. items: clone(language.contextItems),
  640. token: function(index) { return tokens[index]; },
  641. getAllTokens: function() { return tokens.slice(0); },
  642. count: function() { return tokens.length; },
  643. options: options,
  644. embeddedLanguageStack: [],
  645. defaultData: {
  646. text: "",
  647. line: 1,
  648. column: 1
  649. },
  650. createToken: function(name, value, line, column) {
  651. return {
  652. name: name,
  653. line: line,
  654. value: isIe ? value.replace(/\n/g, "\r") : value,
  655. column: column,
  656. language: this.language.name
  657. };
  658. }
  659. };
  660. //if continuation is given, then we need to pick up where we left off from a previous parse
  661. //basically it indicates that a scope was never closed, so we need to continue that scope
  662. if (partialContext.continuation) {
  663. continuation = partialContext.continuation;
  664. partialContext.continuation = null;
  665. tokens.push(continuation(context, continuation, "", context.reader.getLine(), context.reader.getColumn(), true));
  666. }
  667. while (!context.reader.isEof()) {
  668. switchToEmbeddedLanguageIfNecessary(context);
  669. token = parseNextToken(context);
  670. //flush default data if needed (in pretty much all languages this is just whitespace)
  671. if (token !== null) {
  672. if (context.defaultData.text !== "") {
  673. tokens.push(context.createToken("default", context.defaultData.text, context.defaultData.line, context.defaultData.column));
  674. context.defaultData.text = "";
  675. }
  676. if (token[0] !== undefined) {
  677. //multiple tokens
  678. tokens = tokens.concat(token);
  679. } else {
  680. //single token
  681. tokens.push(token);
  682. }
  683. }
  684. switchBackFromEmbeddedLanguageIfNecessary(context);
  685. context.reader.read();
  686. }
  687. //append the last default token, if necessary
  688. if (context.defaultData.text !== "") {
  689. tokens.push(context.createToken("default", context.defaultData.text, context.defaultData.line, context.defaultData.column));
  690. }
  691. fireEvent("afterTokenize", this, { code: unhighlightedCode, parserContext: context });
  692. return context;
  693. }
  694. function createAnalyzerContext(parserContext, partialContext, options) {
  695. var nodes = [],
  696. prepareText = function() {
  697. var nbsp, tab;
  698. if (options.showWhitespace) {
  699. nbsp = String.fromCharCode(0xB7);
  700. tab = new Array(options.tabWidth).join(String.fromCharCode(0x2014)) + String.fromCharCode(0x2192);
  701. } else {
  702. nbsp = String.fromCharCode(0xA0);
  703. tab = new Array(options.tabWidth + 1).join(nbsp);
  704. }
  705. return function(token) {
  706. var value = token.value.split(" ").join(nbsp),
  707. tabIndex,
  708. lastNewlineColumn,
  709. actualColumn,
  710. tabLength;
  711. //tabstop madness: replace \t with the appropriate number of characters, depending on the tabWidth option and its relative position in the line
  712. while ((tabIndex = value.indexOf("\t")) >= 0) {
  713. lastNewlineColumn = value.lastIndexOf(EOL, tabIndex);
  714. actualColumn = lastNewlineColumn === -1 ? tabIndex : tabIndex - lastNewlineColumn - 1;
  715. tabLength = options.tabWidth - (actualColumn % options.tabWidth); //actual length of the TAB character
  716. value = value.substring(0, tabIndex) + tab.substring(options.tabWidth - tabLength) + value.substring(tabIndex + 1);
  717. }
  718. return value;
  719. };
  720. }();
  721. return {
  722. tokens: (partialContext.tokens || []).concat(parserContext.getAllTokens()),
  723. index: partialContext.index ? partialContext.index + 1 : 0,
  724. language: null,
  725. getAnalyzer: EMPTY,
  726. options: options,
  727. continuation: parserContext.continuation,
  728. addNode: function(node) { nodes.push(node); },
  729. createTextNode: function(token) { return document.createTextNode(prepareText(token)); },
  730. getNodes: function() { return nodes; },
  731. resetNodes: function() { nodes = []; },
  732. items: parserContext.items
  733. };
  734. }
  735. //partialContext allows us to perform a partial parse, and then pick up where we left off at a later time
  736. //this functionality enables nested highlights (language within a language, e.g. PHP within HTML followed by more PHP)
  737. function highlightText(unhighlightedCode, languageId, partialContext) {
  738. var language = languages[languageId],
  739. analyzerContext;
  740. partialContext = partialContext || { };
  741. if (language === undefined) {
  742. //use default language if one wasn't specified or hasn't been registered
  743. language = languages[DEFAULT_LANGUAGE];
  744. }
  745. fireEvent("beforeHighlight", this, { code: unhighlightedCode, language: language, previousContext: partialContext });
  746. analyzerContext = createAnalyzerContext(
  747. tokenize.call(this, unhighlightedCode, language, partialContext, this.options),
  748. partialContext,
  749. this.options
  750. );
  751. analyze.call(this, analyzerContext, partialContext.index ? partialContext.index + 1 : 0);
  752. fireEvent("afterHighlight", this, { analyzerContext: analyzerContext });
  753. return analyzerContext;
  754. }
  755. function createContainer(ctx) {
  756. var container = document.createElement("span");
  757. container.className = ctx.options.classPrefix + ctx.language.name;
  758. return container;
  759. }
  760. function analyze(analyzerContext, startIndex) {
  761. var nodes,
  762. lastIndex,
  763. container,
  764. i,
  765. tokenName,
  766. func,
  767. language,
  768. analyzer;
  769. fireEvent("beforeAnalyze", this, { analyzerContext: analyzerContext });
  770. if (analyzerContext.tokens.length > 0) {
  771. analyzerContext.language = languages[analyzerContext.tokens[0].language] || languages[DEFAULT_LANGUAGE];;
  772. nodes = [];
  773. lastIndex = 0;
  774. container = createContainer(analyzerContext);
  775. for (i = startIndex; i < analyzerContext.tokens.length; i++) {
  776. language = languages[analyzerContext.tokens[i].language] || languages[DEFAULT_LANGUAGE];
  777. if (language.name !== analyzerContext.language.name) {
  778. appendAll(container, analyzerContext.getNodes());
  779. analyzerContext.resetNodes();
  780. nodes.push(container);
  781. analyzerContext.language = language;
  782. container = createContainer(analyzerContext);
  783. }
  784. analyzerContext.index = i;
  785. tokenName = analyzerContext.tokens[i].name;
  786. func = "handle_" + tokenName;
  787. analyzer = analyzerContext.getAnalyzer.call(analyzerContext) || analyzerContext.language.analyzer;
  788. analyzer[func] ? analyzer[func](analyzerContext) : analyzer.handleToken(analyzerContext);
  789. }
  790. //append the last nodes, and add the final nodes to the context
  791. appendAll(container, analyzerContext.getNodes());
  792. nodes.push(container);
  793. analyzerContext.resetNodes();
  794. for (i = 0; i < nodes.length; i++) {
  795. analyzerContext.addNode(nodes[i]);
  796. }
  797. }
  798. fireEvent("afterAnalyze", this, { analyzerContext: analyzerContext });
  799. }
  800. return {
  801. //matches the language of the node to highlight
  802. matchSunlightNode: function() {
  803. var regex;
  804. return function(node) {
  805. if (!regex) {
  806. regex = new RegExp("(?:\\s|^)" + this.options.classPrefix + "highlight-(\\S+)(?:\\s|$)");
  807. }
  808. return regex.exec(node.className);
  809. };
  810. }(),
  811. //determines if the node has already been highlighted
  812. isAlreadyHighlighted: function() {
  813. var regex;
  814. return function(node) {
  815. if (!regex) {
  816. regex = new RegExp("(?:\\s|^)" + this.options.classPrefix + "highlighted(?:\\s|$)");
  817. }
  818. return regex.test(node.className);
  819. };
  820. }(),
  821. //highlights a block of text
  822. highlight: function(code, languageId) { return highlightText.call(this, code, languageId); },
  823. //recursively highlights a DOM node
  824. highlightNode: function highlightRecursive(node) {
  825. var match,
  826. languageId,
  827. currentNodeCount,
  828. j,
  829. nodes,
  830. k,
  831. partialContext,
  832. container,
  833. codeContainer;
  834. if (this.isAlreadyHighlighted(node) || (match = this.matchSunlightNode(node)) === null) {
  835. return;
  836. }
  837. languageId = match[1];
  838. currentNodeCount = 0;
  839. fireEvent("beforeHighlightNode", this, { node: node });
  840. for (j = 0; j < node.childNodes.length; j++) {
  841. if (node.childNodes[j].nodeType === 3) {
  842. //text nodes
  843. partialContext = highlightText.call(this, node.childNodes[j].nodeValue, languageId, partialContext);
  844. HIGHLIGHTED_NODE_COUNT++;
  845. currentNodeCount = currentNodeCount || HIGHLIGHTED_NODE_COUNT;
  846. nodes = partialContext.getNodes();
  847. node.replaceChild(nodes[0], node.childNodes[j]);
  848. for (k = 1; k < nodes.length; k++) {
  849. node.insertBefore(nodes[k], nodes[k - 1].nextSibling);
  850. }
  851. } else if (node.childNodes[j].nodeType === 1) {
  852. //element nodes
  853. highlightRecursive.call(this, node.childNodes[j]);
  854. }
  855. }
  856. //indicate that this node has been highlighted
  857. node.className += " " + this.options.classPrefix + "highlighted";
  858. //if the node is block level, we put it in a container, otherwise we just leave it alone
  859. if (getComputedStyle(node, "display") === "block") {
  860. container = document.createElement("div");
  861. container.className = this.options.classPrefix + "container";
  862. codeContainer = document.createElement("div");
  863. codeContainer.className = this.options.classPrefix + "code-container";
  864. //apply max height if specified in options
  865. if (this.options.maxHeight !== false) {
  866. codeContainer.style.overflowY = "auto";
  867. codeContainer.style.maxHeight = this.options.maxHeight + (/^\d+$/.test(this.options.maxHeight) ? "px" : "");
  868. }
  869. container.appendChild(codeContainer);
  870. node.parentNode.insertBefore(codeContainer, node);
  871. node.parentNode.removeChild(node);
  872. codeContainer.appendChild(node);
  873. codeContainer.parentNode.insertBefore(container, codeContainer);
  874. codeContainer.parentNode.removeChild(codeContainer);
  875. container.appendChild(codeContainer);
  876. }
  877. fireEvent("afterHighlightNode", this, {
  878. container: container,
  879. codeContainer: codeContainer,
  880. node: node,
  881. count: currentNodeCount
  882. });
  883. }
  884. };
  885. }());
  886. //public facing object
  887. window.Sunlight = {
  888. version: "1.18",
  889. Highlighter: Highlighter,
  890. createAnalyzer: function() { return create(defaultAnalyzer); },
  891. globalOptions: globalOptions,
  892. highlightAll: function(options) {
  893. var highlighter = new Highlighter(options),
  894. tags = document.getElementsByTagName("*"),
  895. i;
  896. for (i = 0; i < tags.length; i++) {
  897. highlighter.highlightNode(tags[i]);
  898. }
  899. },
  900. registerLanguage: function(languageId, languageData) {
  901. var tokenName,
  902. embeddedLanguages,
  903. languageName;
  904. if (!languageId) {
  905. throw "Languages must be registered with an identifier, e.g. \"php\" for PHP";
  906. }
  907. languageData = merge(merge({}, languageDefaults), languageData);
  908. languageData.name = languageId;
  909. //transform keywords, operators and custom tokens into a hash map
  910. languageData.keywords = createHashMap(languageData.keywords || [], "\\b", languageData.caseInsensitive);
  911. languageData.operators = createHashMap(languageData.operators || [], "", languageData.caseInsensitive);
  912. for (tokenName in languageData.customTokens) {
  913. languageData.customTokens[tokenName] = createHashMap(
  914. languageData.customTokens[tokenName].values,
  915. languageData.customTokens[tokenName].boundary,
  916. languageData.caseInsensitive
  917. );
  918. }
  919. //convert the embedded language object to an easier-to-use array
  920. embeddedLanguages = [];
  921. for (languageName in languageData.embeddedLanguages) {
  922. embeddedLanguages.push({
  923. parentLanguage: languageData.name,
  924. language: languageName,
  925. switchTo: languageData.embeddedLanguages[languageName].switchTo,
  926. switchBack: languageData.embeddedLanguages[languageName].switchBack
  927. });
  928. }
  929. languageData.embeddedLanguages = embeddedLanguages;
  930. languages[languageData.name] = languageData;
  931. },
  932. isRegistered: function(languageId) { return languages[languageId] !== undefined; },
  933. bind: function(event, callback) {
  934. if (!events[event]) {
  935. throw "Unknown event \"" + event + "\"";
  936. }
  937. events[event].push(callback);
  938. },
  939. util: {
  940. last: last,
  941. regexEscape: regexEscape,
  942. eol: EOL,
  943. clone: clone,
  944. escapeSequences: ["\\n", "\\t", "\\r", "\\\\", "\\v", "\\f"],
  945. contains: contains,
  946. matchWord: matchWord,
  947. createHashMap: createHashMap,
  948. createBetweenRule: createBetweenRule,
  949. createProceduralRule: createProceduralRule,
  950. getNextNonWsToken: function(tokens, index) { return getNextWhile(tokens, index, 1, function(token) { return token.name === "default"; }); },
  951. getPreviousNonWsToken: function(tokens, index) { return getNextWhile(tokens, index, -1, function(token) { return token.name === "default"; }); },
  952. getNextWhile: function(tokens, index, matcher) { return getNextWhile(tokens, index, 1, matcher); },
  953. getPreviousWhile: function(tokens, index, matcher) { return getNextWhile(tokens, index, -1, matcher); },
  954. whitespace: { token: "default", optional: true },
  955. getComputedStyle: getComputedStyle
  956. }
  957. };
  958. //register the default language
  959. window.Sunlight.registerLanguage(DEFAULT_LANGUAGE, { punctuation: /(?!x)x/, numberParser: EMPTY });
  960. }(this, document));