regjsgen.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. /*!
  2. * regjsgen 0.5.0
  3. * Copyright 2014-2018 Benjamin Tan <https://bnjmnt4n.now.sh/>
  4. * Available under MIT license <https://github.com/bnjmnt4n/regjsgen/blob/master/LICENSE>
  5. */
  6. ;(function() {
  7. 'use strict';
  8. // Used to determine if values are of the language type `Object`.
  9. var objectTypes = {
  10. 'function': true,
  11. 'object': true
  12. };
  13. // Used as a reference to the global object.
  14. var root = (objectTypes[typeof window] && window) || this;
  15. // Detect free variable `exports`.
  16. var freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports;
  17. // Detect free variable `module`.
  18. var hasFreeModule = objectTypes[typeof module] && module && !module.nodeType;
  19. // Detect free variable `global` from Node.js or Browserified code and use it as `root`.
  20. var freeGlobal = freeExports && hasFreeModule && typeof global == 'object' && global;
  21. if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
  22. root = freeGlobal;
  23. }
  24. // Used to check objects for own properties.
  25. var hasOwnProperty = Object.prototype.hasOwnProperty;
  26. /*--------------------------------------------------------------------------*/
  27. // Generates a string based on the given code point.
  28. // Based on https://mths.be/fromcodepoint by @mathias.
  29. function fromCodePoint() {
  30. var codePoint = Number(arguments[0]);
  31. if (
  32. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  33. codePoint < 0 || // not a valid Unicode code point
  34. codePoint > 0x10FFFF || // not a valid Unicode code point
  35. Math.floor(codePoint) != codePoint // not an integer
  36. ) {
  37. throw RangeError('Invalid code point: ' + codePoint);
  38. }
  39. if (codePoint <= 0xFFFF) {
  40. // BMP code point
  41. return String.fromCharCode(codePoint);
  42. } else {
  43. // Astral code point; split in surrogate halves
  44. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  45. codePoint -= 0x10000;
  46. var highSurrogate = (codePoint >> 10) + 0xD800;
  47. var lowSurrogate = (codePoint % 0x400) + 0xDC00;
  48. return String.fromCharCode(highSurrogate, lowSurrogate);
  49. }
  50. }
  51. /*--------------------------------------------------------------------------*/
  52. // Ensures that nodes have the correct types.
  53. var assertTypeRegexMap = {};
  54. function assertType(type, expected) {
  55. if (expected.indexOf('|') == -1) {
  56. if (type == expected) {
  57. return;
  58. }
  59. throw Error('Invalid node type: ' + type + '; expected type: ' + expected);
  60. }
  61. expected = hasOwnProperty.call(assertTypeRegexMap, expected)
  62. ? assertTypeRegexMap[expected]
  63. : (assertTypeRegexMap[expected] = RegExp('^(?:' + expected + ')$'));
  64. if (expected.test(type)) {
  65. return;
  66. }
  67. throw Error('Invalid node type: ' + type + '; expected types: ' + expected);
  68. }
  69. /*--------------------------------------------------------------------------*/
  70. // Generates a regular expression string based on an AST.
  71. function generate(node) {
  72. var type = node.type;
  73. if (hasOwnProperty.call(generators, type)) {
  74. return generators[type](node);
  75. }
  76. throw Error('Invalid node type: ' + type);
  77. }
  78. /*--------------------------------------------------------------------------*/
  79. function generateAlternative(node) {
  80. assertType(node.type, 'alternative');
  81. var terms = node.body,
  82. i = -1,
  83. length = terms.length,
  84. result = '';
  85. while (++i < length) {
  86. result += generateTerm(terms[i]);
  87. }
  88. return result;
  89. }
  90. function generateAnchor(node) {
  91. assertType(node.type, 'anchor');
  92. switch (node.kind) {
  93. case 'start':
  94. return '^';
  95. case 'end':
  96. return '$';
  97. case 'boundary':
  98. return '\\b';
  99. case 'not-boundary':
  100. return '\\B';
  101. default:
  102. throw Error('Invalid assertion');
  103. }
  104. }
  105. function generateAtom(node) {
  106. assertType(node.type, 'anchor|characterClass|characterClassEscape|dot|group|reference|value');
  107. return generate(node);
  108. }
  109. function generateCharacterClass(node) {
  110. assertType(node.type, 'characterClass');
  111. var classRanges = node.body,
  112. i = -1,
  113. length = classRanges.length,
  114. result = '';
  115. if (node.negative) {
  116. result += '^';
  117. }
  118. while (++i < length) {
  119. result += generateClassAtom(classRanges[i]);
  120. }
  121. return '[' + result + ']';
  122. }
  123. function generateCharacterClassEscape(node) {
  124. assertType(node.type, 'characterClassEscape');
  125. return '\\' + node.value;
  126. }
  127. function generateUnicodePropertyEscape(node) {
  128. assertType(node.type, 'unicodePropertyEscape');
  129. return '\\' + (node.negative ? 'P' : 'p') + '{' + node.value + '}';
  130. }
  131. function generateCharacterClassRange(node) {
  132. assertType(node.type, 'characterClassRange');
  133. var min = node.min,
  134. max = node.max;
  135. if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
  136. throw Error('Invalid character class range');
  137. }
  138. return generateClassAtom(min) + '-' + generateClassAtom(max);
  139. }
  140. function generateClassAtom(node) {
  141. assertType(node.type, 'anchor|characterClassEscape|characterClassRange|dot|value');
  142. return generate(node);
  143. }
  144. function generateDisjunction(node) {
  145. assertType(node.type, 'disjunction');
  146. var body = node.body,
  147. i = -1,
  148. length = body.length,
  149. result = '';
  150. while (++i < length) {
  151. if (i != 0) {
  152. result += '|';
  153. }
  154. result += generate(body[i]);
  155. }
  156. return result;
  157. }
  158. function generateDot(node) {
  159. assertType(node.type, 'dot');
  160. return '.';
  161. }
  162. function generateGroup(node) {
  163. assertType(node.type, 'group');
  164. var result = '';
  165. switch (node.behavior) {
  166. case 'normal':
  167. if (node.name) {
  168. result += '?<' + generateIdentifier(node.name) + '>';
  169. }
  170. break;
  171. case 'ignore':
  172. result += '?:';
  173. break;
  174. case 'lookahead':
  175. result += '?=';
  176. break;
  177. case 'negativeLookahead':
  178. result += '?!';
  179. break;
  180. case 'lookbehind':
  181. result += '?<=';
  182. break;
  183. case 'negativeLookbehind':
  184. result += '?<!';
  185. break;
  186. default:
  187. throw Error('Invalid behaviour: ' + node.behaviour);
  188. }
  189. var body = node.body,
  190. i = -1,
  191. length = body.length;
  192. while (++i < length) {
  193. result += generate(body[i]);
  194. }
  195. return '(' + result + ')';
  196. }
  197. function generateIdentifier(node) {
  198. assertType(node.type, 'identifier');
  199. return node.value;
  200. }
  201. function generateQuantifier(node) {
  202. assertType(node.type, 'quantifier');
  203. var quantifier = '',
  204. min = node.min,
  205. max = node.max;
  206. if (max == null) {
  207. if (min == 0) {
  208. quantifier = '*';
  209. } else if (min == 1) {
  210. quantifier = '+';
  211. } else {
  212. quantifier = '{' + min + ',}';
  213. }
  214. } else if (min == max) {
  215. quantifier = '{' + min + '}';
  216. } else if (min == 0 && max == 1) {
  217. quantifier = '?';
  218. } else {
  219. quantifier = '{' + min + ',' + max + '}';
  220. }
  221. if (!node.greedy) {
  222. quantifier += '?';
  223. }
  224. return generateAtom(node.body[0]) + quantifier;
  225. }
  226. function generateReference(node) {
  227. assertType(node.type, 'reference');
  228. if (node.matchIndex) {
  229. return '\\' + node.matchIndex;
  230. }
  231. if (node.name) {
  232. return '\\k<' + generateIdentifier(node.name) + '>';
  233. }
  234. throw new Error('Unknown reference type');
  235. }
  236. function generateTerm(node) {
  237. assertType(node.type, 'anchor|characterClass|characterClassEscape|empty|group|quantifier|reference|unicodePropertyEscape|value');
  238. return generate(node);
  239. }
  240. function generateValue(node) {
  241. assertType(node.type, 'value');
  242. var kind = node.kind,
  243. codePoint = node.codePoint;
  244. if (typeof codePoint != 'number') {
  245. throw new Error('Invalid code point: ' + codePoint);
  246. }
  247. switch (kind) {
  248. case 'controlLetter':
  249. return '\\c' + fromCodePoint(codePoint + 64);
  250. case 'hexadecimalEscape':
  251. return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
  252. case 'identifier':
  253. return '\\' + fromCodePoint(codePoint);
  254. case 'null':
  255. return '\\' + codePoint;
  256. case 'octal':
  257. return '\\' + codePoint.toString(8);
  258. case 'singleEscape':
  259. switch (codePoint) {
  260. case 0x0008:
  261. return '\\b';
  262. case 0x0009:
  263. return '\\t';
  264. case 0x000A:
  265. return '\\n';
  266. case 0x000B:
  267. return '\\v';
  268. case 0x000C:
  269. return '\\f';
  270. case 0x000D:
  271. return '\\r';
  272. default:
  273. throw Error('Invalid code point: ' + codePoint);
  274. }
  275. case 'symbol':
  276. return fromCodePoint(codePoint);
  277. case 'unicodeEscape':
  278. return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
  279. case 'unicodeCodePointEscape':
  280. return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
  281. default:
  282. throw Error('Unsupported node kind: ' + kind);
  283. }
  284. }
  285. /*--------------------------------------------------------------------------*/
  286. // Used to generate strings for each node type.
  287. var generators = {
  288. 'alternative': generateAlternative,
  289. 'anchor': generateAnchor,
  290. 'characterClass': generateCharacterClass,
  291. 'characterClassEscape': generateCharacterClassEscape,
  292. 'characterClassRange': generateCharacterClassRange,
  293. 'unicodePropertyEscape': generateUnicodePropertyEscape,
  294. 'disjunction': generateDisjunction,
  295. 'dot': generateDot,
  296. 'group': generateGroup,
  297. 'quantifier': generateQuantifier,
  298. 'reference': generateReference,
  299. 'value': generateValue
  300. };
  301. /*--------------------------------------------------------------------------*/
  302. // Export regjsgen.
  303. var regjsgen = {
  304. 'generate': generate
  305. };
  306. // Some AMD build optimizers, like r.js, check for condition patterns like the following:
  307. if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
  308. // Define as an anonymous module so it can be aliased through path mapping.
  309. define(function() {
  310. return regjsgen;
  311. });
  312. root.regjsgen = regjsgen;
  313. }
  314. // Check for `exports` after `define` in case a build optimizer adds an `exports` object.
  315. else if (freeExports && hasFreeModule) {
  316. // Export for CommonJS support.
  317. freeExports.generate = generate;
  318. }
  319. else {
  320. // Export to the global object.
  321. root.regjsgen = regjsgen;
  322. }
  323. }.call(this));