lexer.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. define([
  2. 'lodash'
  3. ], function(_) {
  4. 'use strict';
  5. // This is auto generated from the unicode tables.
  6. // The tables are at:
  7. // http://www.fileformat.info/info/unicode/category/Lu/list.htm
  8. // http://www.fileformat.info/info/unicode/category/Ll/list.htm
  9. // http://www.fileformat.info/info/unicode/category/Lt/list.htm
  10. // http://www.fileformat.info/info/unicode/category/Lm/list.htm
  11. // http://www.fileformat.info/info/unicode/category/Lo/list.htm
  12. // http://www.fileformat.info/info/unicode/category/Nl/list.htm
  13. var unicodeLetterTable = [
  14. 170, 170, 181, 181, 186, 186, 192, 214,
  15. 216, 246, 248, 705, 710, 721, 736, 740, 748, 748, 750, 750,
  16. 880, 884, 886, 887, 890, 893, 902, 902, 904, 906, 908, 908,
  17. 910, 929, 931, 1013, 1015, 1153, 1162, 1319, 1329, 1366,
  18. 1369, 1369, 1377, 1415, 1488, 1514, 1520, 1522, 1568, 1610,
  19. 1646, 1647, 1649, 1747, 1749, 1749, 1765, 1766, 1774, 1775,
  20. 1786, 1788, 1791, 1791, 1808, 1808, 1810, 1839, 1869, 1957,
  21. 1969, 1969, 1994, 2026, 2036, 2037, 2042, 2042, 2048, 2069,
  22. 2074, 2074, 2084, 2084, 2088, 2088, 2112, 2136, 2308, 2361,
  23. 2365, 2365, 2384, 2384, 2392, 2401, 2417, 2423, 2425, 2431,
  24. 2437, 2444, 2447, 2448, 2451, 2472, 2474, 2480, 2482, 2482,
  25. 2486, 2489, 2493, 2493, 2510, 2510, 2524, 2525, 2527, 2529,
  26. 2544, 2545, 2565, 2570, 2575, 2576, 2579, 2600, 2602, 2608,
  27. 2610, 2611, 2613, 2614, 2616, 2617, 2649, 2652, 2654, 2654,
  28. 2674, 2676, 2693, 2701, 2703, 2705, 2707, 2728, 2730, 2736,
  29. 2738, 2739, 2741, 2745, 2749, 2749, 2768, 2768, 2784, 2785,
  30. 2821, 2828, 2831, 2832, 2835, 2856, 2858, 2864, 2866, 2867,
  31. 2869, 2873, 2877, 2877, 2908, 2909, 2911, 2913, 2929, 2929,
  32. 2947, 2947, 2949, 2954, 2958, 2960, 2962, 2965, 2969, 2970,
  33. 2972, 2972, 2974, 2975, 2979, 2980, 2984, 2986, 2990, 3001,
  34. 3024, 3024, 3077, 3084, 3086, 3088, 3090, 3112, 3114, 3123,
  35. 3125, 3129, 3133, 3133, 3160, 3161, 3168, 3169, 3205, 3212,
  36. 3214, 3216, 3218, 3240, 3242, 3251, 3253, 3257, 3261, 3261,
  37. 3294, 3294, 3296, 3297, 3313, 3314, 3333, 3340, 3342, 3344,
  38. 3346, 3386, 3389, 3389, 3406, 3406, 3424, 3425, 3450, 3455,
  39. 3461, 3478, 3482, 3505, 3507, 3515, 3517, 3517, 3520, 3526,
  40. 3585, 3632, 3634, 3635, 3648, 3654, 3713, 3714, 3716, 3716,
  41. 3719, 3720, 3722, 3722, 3725, 3725, 3732, 3735, 3737, 3743,
  42. 3745, 3747, 3749, 3749, 3751, 3751, 3754, 3755, 3757, 3760,
  43. 3762, 3763, 3773, 3773, 3776, 3780, 3782, 3782, 3804, 3805,
  44. 3840, 3840, 3904, 3911, 3913, 3948, 3976, 3980, 4096, 4138,
  45. 4159, 4159, 4176, 4181, 4186, 4189, 4193, 4193, 4197, 4198,
  46. 4206, 4208, 4213, 4225, 4238, 4238, 4256, 4293, 4304, 4346,
  47. 4348, 4348, 4352, 4680, 4682, 4685, 4688, 4694, 4696, 4696,
  48. 4698, 4701, 4704, 4744, 4746, 4749, 4752, 4784, 4786, 4789,
  49. 4792, 4798, 4800, 4800, 4802, 4805, 4808, 4822, 4824, 4880,
  50. 4882, 4885, 4888, 4954, 4992, 5007, 5024, 5108, 5121, 5740,
  51. 5743, 5759, 5761, 5786, 5792, 5866, 5870, 5872, 5888, 5900,
  52. 5902, 5905, 5920, 5937, 5952, 5969, 5984, 5996, 5998, 6000,
  53. 6016, 6067, 6103, 6103, 6108, 6108, 6176, 6263, 6272, 6312,
  54. 6314, 6314, 6320, 6389, 6400, 6428, 6480, 6509, 6512, 6516,
  55. 6528, 6571, 6593, 6599, 6656, 6678, 6688, 6740, 6823, 6823,
  56. 6917, 6963, 6981, 6987, 7043, 7072, 7086, 7087, 7104, 7141,
  57. 7168, 7203, 7245, 7247, 7258, 7293, 7401, 7404, 7406, 7409,
  58. 7424, 7615, 7680, 7957, 7960, 7965, 7968, 8005, 8008, 8013,
  59. 8016, 8023, 8025, 8025, 8027, 8027, 8029, 8029, 8031, 8061,
  60. 8064, 8116, 8118, 8124, 8126, 8126, 8130, 8132, 8134, 8140,
  61. 8144, 8147, 8150, 8155, 8160, 8172, 8178, 8180, 8182, 8188,
  62. 8305, 8305, 8319, 8319, 8336, 8348, 8450, 8450, 8455, 8455,
  63. 8458, 8467, 8469, 8469, 8473, 8477, 8484, 8484, 8486, 8486,
  64. 8488, 8488, 8490, 8493, 8495, 8505, 8508, 8511, 8517, 8521,
  65. 8526, 8526, 8544, 8584, 11264, 11310, 11312, 11358,
  66. 11360, 11492, 11499, 11502, 11520, 11557, 11568, 11621,
  67. 11631, 11631, 11648, 11670, 11680, 11686, 11688, 11694,
  68. 11696, 11702, 11704, 11710, 11712, 11718, 11720, 11726,
  69. 11728, 11734, 11736, 11742, 11823, 11823, 12293, 12295,
  70. 12321, 12329, 12337, 12341, 12344, 12348, 12353, 12438,
  71. 12445, 12447, 12449, 12538, 12540, 12543, 12549, 12589,
  72. 12593, 12686, 12704, 12730, 12784, 12799, 13312, 13312,
  73. 19893, 19893, 19968, 19968, 40907, 40907, 40960, 42124,
  74. 42192, 42237, 42240, 42508, 42512, 42527, 42538, 42539,
  75. 42560, 42606, 42623, 42647, 42656, 42735, 42775, 42783,
  76. 42786, 42888, 42891, 42894, 42896, 42897, 42912, 42921,
  77. 43002, 43009, 43011, 43013, 43015, 43018, 43020, 43042,
  78. 43072, 43123, 43138, 43187, 43250, 43255, 43259, 43259,
  79. 43274, 43301, 43312, 43334, 43360, 43388, 43396, 43442,
  80. 43471, 43471, 43520, 43560, 43584, 43586, 43588, 43595,
  81. 43616, 43638, 43642, 43642, 43648, 43695, 43697, 43697,
  82. 43701, 43702, 43705, 43709, 43712, 43712, 43714, 43714,
  83. 43739, 43741, 43777, 43782, 43785, 43790, 43793, 43798,
  84. 43808, 43814, 43816, 43822, 43968, 44002, 44032, 44032,
  85. 55203, 55203, 55216, 55238, 55243, 55291, 63744, 64045,
  86. 64048, 64109, 64112, 64217, 64256, 64262, 64275, 64279,
  87. 64285, 64285, 64287, 64296, 64298, 64310, 64312, 64316,
  88. 64318, 64318, 64320, 64321, 64323, 64324, 64326, 64433,
  89. 64467, 64829, 64848, 64911, 64914, 64967, 65008, 65019,
  90. 65136, 65140, 65142, 65276, 65313, 65338, 65345, 65370,
  91. 65382, 65470, 65474, 65479, 65482, 65487, 65490, 65495,
  92. 65498, 65500, 65536, 65547, 65549, 65574, 65576, 65594,
  93. 65596, 65597, 65599, 65613, 65616, 65629, 65664, 65786,
  94. 65856, 65908, 66176, 66204, 66208, 66256, 66304, 66334,
  95. 66352, 66378, 66432, 66461, 66464, 66499, 66504, 66511,
  96. 66513, 66517, 66560, 66717, 67584, 67589, 67592, 67592,
  97. 67594, 67637, 67639, 67640, 67644, 67644, 67647, 67669,
  98. 67840, 67861, 67872, 67897, 68096, 68096, 68112, 68115,
  99. 68117, 68119, 68121, 68147, 68192, 68220, 68352, 68405,
  100. 68416, 68437, 68448, 68466, 68608, 68680, 69635, 69687,
  101. 69763, 69807, 73728, 74606, 74752, 74850, 77824, 78894,
  102. 92160, 92728, 110592, 110593, 119808, 119892, 119894, 119964,
  103. 119966, 119967, 119970, 119970, 119973, 119974, 119977, 119980,
  104. 119982, 119993, 119995, 119995, 119997, 120003, 120005, 120069,
  105. 120071, 120074, 120077, 120084, 120086, 120092, 120094, 120121,
  106. 120123, 120126, 120128, 120132, 120134, 120134, 120138, 120144,
  107. 120146, 120485, 120488, 120512, 120514, 120538, 120540, 120570,
  108. 120572, 120596, 120598, 120628, 120630, 120654, 120656, 120686,
  109. 120688, 120712, 120714, 120744, 120746, 120770, 120772, 120779,
  110. 131072, 131072, 173782, 173782, 173824, 173824, 177972, 177972,
  111. 177984, 177984, 178205, 178205, 194560, 195101
  112. ];
  113. var identifierStartTable = [];
  114. for (var i = 0; i < 128; i++) {
  115. identifierStartTable[i] =
  116. i >= 48 && i <= 57 || // 0-9
  117. i === 36 || // $
  118. i === 126 || // ~
  119. i === 124 || // |
  120. i >= 65 && i <= 90 || // A-Z
  121. i === 95 || // _
  122. i === 45 || // -
  123. i === 42 || // *
  124. i === 58 || // :
  125. i === 91 || // templateStart [
  126. i === 93 || // templateEnd ]
  127. i === 63 || // ?
  128. i === 37 || // %
  129. i === 35 || // #
  130. i === 61 || // =
  131. i >= 97 && i <= 122; // a-z
  132. }
  133. var identifierPartTable = [];
  134. for (var i2 = 0; i2 < 128; i2++) {
  135. identifierPartTable[i2] =
  136. identifierStartTable[i2] || // $, _, A-Z, a-z
  137. i2 >= 48 && i2 <= 57; // 0-9
  138. }
  139. function Lexer(expression) {
  140. this.input = expression;
  141. this.char = 1;
  142. this.from = 1;
  143. }
  144. Lexer.prototype = {
  145. peek: function (i) {
  146. return this.input.charAt(i || 0);
  147. },
  148. skip: function (i) {
  149. i = i || 1;
  150. this.char += i;
  151. this.input = this.input.slice(i);
  152. },
  153. tokenize: function() {
  154. var list = [];
  155. var token;
  156. while (token = this.next()) {
  157. list.push(token);
  158. }
  159. return list;
  160. },
  161. next: function() {
  162. this.from = this.char;
  163. // Move to the next non-space character.
  164. var start;
  165. if (/\s/.test(this.peek())) {
  166. start = this.char;
  167. while (/\s/.test(this.peek())) {
  168. this.from += 1;
  169. this.skip();
  170. }
  171. if (this.peek() === "") { // EOL
  172. return null;
  173. }
  174. }
  175. var match = this.scanStringLiteral();
  176. if (match) {
  177. return match;
  178. }
  179. match =
  180. this.scanPunctuator() ||
  181. this.scanNumericLiteral() ||
  182. this.scanIdentifier() ||
  183. this.scanTemplateSequence();
  184. if (match) {
  185. this.skip(match.value.length);
  186. return match;
  187. }
  188. // No token could be matched, give up.
  189. return null;
  190. },
  191. scanTemplateSequence: function() {
  192. if (this.peek() === '[' && this.peek(1) === '[') {
  193. return {
  194. type: 'templateStart',
  195. value: '[[',
  196. pos: this.char
  197. };
  198. }
  199. if (this.peek() === ']' && this.peek(1) === ']') {
  200. return {
  201. type: 'templateEnd',
  202. value: '[[',
  203. pos: this.char
  204. };
  205. }
  206. return null;
  207. },
  208. /*
  209. * Extract a JavaScript identifier out of the next sequence of
  210. * characters or return 'null' if its not possible. In addition,
  211. * to Identifier this method can also produce BooleanLiteral
  212. * (true/false) and NullLiteral (null).
  213. */
  214. scanIdentifier: function() {
  215. var id = "";
  216. var index = 0;
  217. var type, char;
  218. // Detects any character in the Unicode categories "Uppercase
  219. // letter (Lu)", "Lowercase letter (Ll)", "Titlecase letter
  220. // (Lt)", "Modifier letter (Lm)", "Other letter (Lo)", or
  221. // "Letter number (Nl)".
  222. //
  223. // Both approach and unicodeLetterTable were borrowed from
  224. // Google's Traceur.
  225. function isUnicodeLetter(code) {
  226. for (var i = 0; i < unicodeLetterTable.length;) {
  227. if (code < unicodeLetterTable[i++]) {
  228. return false;
  229. }
  230. if (code <= unicodeLetterTable[i++]) {
  231. return true;
  232. }
  233. }
  234. return false;
  235. }
  236. function isHexDigit(str) {
  237. return (/^[0-9a-fA-F]$/).test(str);
  238. }
  239. var readUnicodeEscapeSequence = _.bind(function () {
  240. /*jshint validthis:true */
  241. index += 1;
  242. if (this.peek(index) !== "u") {
  243. return null;
  244. }
  245. var ch1 = this.peek(index + 1);
  246. var ch2 = this.peek(index + 2);
  247. var ch3 = this.peek(index + 3);
  248. var ch4 = this.peek(index + 4);
  249. var code;
  250. if (isHexDigit(ch1) && isHexDigit(ch2) && isHexDigit(ch3) && isHexDigit(ch4)) {
  251. code = parseInt(ch1 + ch2 + ch3 + ch4, 16);
  252. if (isUnicodeLetter(code)) {
  253. index += 5;
  254. return "\\u" + ch1 + ch2 + ch3 + ch4;
  255. }
  256. return null;
  257. }
  258. return null;
  259. }, this);
  260. var getIdentifierStart = _.bind(function () {
  261. /*jshint validthis:true */
  262. var chr = this.peek(index);
  263. var code = chr.charCodeAt(0);
  264. if (chr === '*') {
  265. index += 1;
  266. return chr;
  267. }
  268. if (code === 92) {
  269. return readUnicodeEscapeSequence();
  270. }
  271. if (code < 128) {
  272. if (identifierStartTable[code]) {
  273. index += 1;
  274. return chr;
  275. }
  276. return null;
  277. }
  278. if (isUnicodeLetter(code)) {
  279. index += 1;
  280. return chr;
  281. }
  282. return null;
  283. }, this);
  284. var getIdentifierPart = _.bind(function () {
  285. /*jshint validthis:true */
  286. var chr = this.peek(index);
  287. var code = chr.charCodeAt(0);
  288. if (code === 92) {
  289. return readUnicodeEscapeSequence();
  290. }
  291. if (code < 128) {
  292. if (identifierPartTable[code]) {
  293. index += 1;
  294. return chr;
  295. }
  296. return null;
  297. }
  298. if (isUnicodeLetter(code)) {
  299. index += 1;
  300. return chr;
  301. }
  302. return null;
  303. }, this);
  304. char = getIdentifierStart();
  305. if (char === null) {
  306. return null;
  307. }
  308. id = char;
  309. for (;;) {
  310. char = getIdentifierPart();
  311. if (char === null) {
  312. break;
  313. }
  314. id += char;
  315. }
  316. switch (id) {
  317. case 'true': {
  318. type = 'bool';
  319. break;
  320. }
  321. case 'false': {
  322. type = 'bool';
  323. break;
  324. }
  325. default:
  326. type = "identifier";
  327. }
  328. return {
  329. type: type,
  330. value: id,
  331. pos: this.char
  332. };
  333. },
  334. /*
  335. * Extract a numeric literal out of the next sequence of
  336. * characters or return 'null' if its not possible. This method
  337. * supports all numeric literals described in section 7.8.3
  338. * of the EcmaScript 5 specification.
  339. *
  340. * This method's implementation was heavily influenced by the
  341. * scanNumericLiteral function in the Esprima parser's source code.
  342. */
  343. scanNumericLiteral: function () {
  344. var index = 0;
  345. var value = "";
  346. var length = this.input.length;
  347. var char = this.peek(index);
  348. var bad;
  349. function isDecimalDigit(str) {
  350. return (/^[0-9]$/).test(str);
  351. }
  352. function isOctalDigit(str) {
  353. return (/^[0-7]$/).test(str);
  354. }
  355. function isHexDigit(str) {
  356. return (/^[0-9a-fA-F]$/).test(str);
  357. }
  358. function isIdentifierStart(ch) {
  359. return (ch === "$") || (ch === "_") || (ch === "\\") ||
  360. (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z");
  361. }
  362. // handle negative num literals
  363. if (char === '-') {
  364. value += char;
  365. index += 1;
  366. char = this.peek(index);
  367. }
  368. // Numbers must start either with a decimal digit or a point.
  369. if (char !== "." && !isDecimalDigit(char)) {
  370. return null;
  371. }
  372. if (char !== ".") {
  373. value += this.peek(index);
  374. index += 1;
  375. char = this.peek(index);
  376. if (value === "0") {
  377. // Base-16 numbers.
  378. if (char === "x" || char === "X") {
  379. index += 1;
  380. value += char;
  381. while (index < length) {
  382. char = this.peek(index);
  383. if (!isHexDigit(char)) {
  384. break;
  385. }
  386. value += char;
  387. index += 1;
  388. }
  389. if (value.length <= 2) { // 0x
  390. return {
  391. type: 'number',
  392. value: value,
  393. isMalformed: true,
  394. pos: this.char
  395. };
  396. }
  397. if (index < length) {
  398. char = this.peek(index);
  399. if (isIdentifierStart(char)) {
  400. return null;
  401. }
  402. }
  403. return {
  404. type: 'number',
  405. value: value,
  406. base: 16,
  407. isMalformed: false,
  408. pos: this.char
  409. };
  410. }
  411. // Base-8 numbers.
  412. if (isOctalDigit(char)) {
  413. index += 1;
  414. value += char;
  415. bad = false;
  416. while (index < length) {
  417. char = this.peek(index);
  418. // Numbers like '019' (note the 9) are not valid octals
  419. // but we still parse them and mark as malformed.
  420. if (isDecimalDigit(char)) {
  421. bad = true;
  422. } else if (!isOctalDigit(char)) {
  423. break;
  424. }
  425. value += char;
  426. index += 1;
  427. }
  428. if (index < length) {
  429. char = this.peek(index);
  430. if (isIdentifierStart(char)) {
  431. return null;
  432. }
  433. }
  434. return {
  435. type: 'number',
  436. value: value,
  437. base: 8,
  438. isMalformed: false
  439. };
  440. }
  441. // Decimal numbers that start with '0' such as '09' are illegal
  442. // but we still parse them and return as malformed.
  443. if (isDecimalDigit(char)) {
  444. index += 1;
  445. value += char;
  446. }
  447. }
  448. while (index < length) {
  449. char = this.peek(index);
  450. if (!isDecimalDigit(char)) {
  451. break;
  452. }
  453. value += char;
  454. index += 1;
  455. }
  456. }
  457. // Decimal digits.
  458. if (char === ".") {
  459. value += char;
  460. index += 1;
  461. while (index < length) {
  462. char = this.peek(index);
  463. if (!isDecimalDigit(char)) {
  464. break;
  465. }
  466. value += char;
  467. index += 1;
  468. }
  469. }
  470. // Exponent part.
  471. if (char === "e" || char === "E") {
  472. value += char;
  473. index += 1;
  474. char = this.peek(index);
  475. if (char === "+" || char === "-") {
  476. value += this.peek(index);
  477. index += 1;
  478. }
  479. char = this.peek(index);
  480. if (isDecimalDigit(char)) {
  481. value += char;
  482. index += 1;
  483. while (index < length) {
  484. char = this.peek(index);
  485. if (!isDecimalDigit(char)) {
  486. break;
  487. }
  488. value += char;
  489. index += 1;
  490. }
  491. } else {
  492. return null;
  493. }
  494. }
  495. if (index < length) {
  496. char = this.peek(index);
  497. if (!this.isPunctuator(char)) {
  498. return null;
  499. }
  500. }
  501. return {
  502. type: 'number',
  503. value: value,
  504. base: 10,
  505. pos: this.char,
  506. isMalformed: !isFinite(value)
  507. };
  508. },
  509. isPunctuator: function (ch1) {
  510. switch (ch1) {
  511. case ".":
  512. case "(":
  513. case ")":
  514. case ",":
  515. case "{":
  516. case "}":
  517. return true;
  518. }
  519. return false;
  520. },
  521. scanPunctuator: function () {
  522. var ch1 = this.peek();
  523. if (this.isPunctuator(ch1)) {
  524. return {
  525. type: ch1,
  526. value: ch1,
  527. pos: this.char
  528. };
  529. }
  530. return null;
  531. },
  532. /*
  533. * Extract a string out of the next sequence of characters and/or
  534. * lines or return 'null' if its not possible. Since strings can
  535. * span across multiple lines this method has to move the char
  536. * pointer.
  537. *
  538. * This method recognizes pseudo-multiline JavaScript strings:
  539. *
  540. * var str = "hello\
  541. * world";
  542. */
  543. scanStringLiteral: function () {
  544. /*jshint loopfunc:true */
  545. var quote = this.peek();
  546. // String must start with a quote.
  547. if (quote !== "\"" && quote !== "'") {
  548. return null;
  549. }
  550. var value = "";
  551. this.skip();
  552. while (this.peek() !== quote) {
  553. if (this.peek() === "") { // End Of Line
  554. return {
  555. type: 'string',
  556. value: value,
  557. isUnclosed: true,
  558. quote: quote,
  559. pos: this.char
  560. };
  561. }
  562. var char = this.peek();
  563. var jump = 1; // A length of a jump, after we're done
  564. // parsing this character.
  565. value += char;
  566. this.skip(jump);
  567. }
  568. this.skip();
  569. return {
  570. type: 'string',
  571. value: value,
  572. isUnclosed: false,
  573. quote: quote,
  574. pos: this.char
  575. };
  576. },
  577. };
  578. return Lexer;
  579. });