string.js 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. var REVERSE_SOLIDUS = 0x5c; // \
  2. var QUOTATION_MARK = 0x22; // "
  3. var APOSTROPHE = 0x27; // '
  4. var TAB = 0x09; // tab
  5. var WHITESPACE = 0x20; // space
  6. var AMPERSAND = 0x26;
  7. var LESSTHANSIGN = 0x3C;
  8. var GREATERTHANSIGN = 0x3E;
  9. function isHex(code) {
  10. return (code >= 48 && code <= 57) || // 0 .. 9
  11. (code >= 65 && code <= 70) || // A .. F
  12. (code >= 97 && code <= 102); // a .. f
  13. }
  14. function decodeString(str) {
  15. var decoded = '';
  16. var len = str.length;
  17. var firstChar = str.charCodeAt(0);
  18. var start = firstChar === QUOTATION_MARK || firstChar === APOSTROPHE ? 1 : 0;
  19. var end = start === 1 && len > 1 && str.charCodeAt(len - 1) === firstChar ? len - 2 : len - 1;
  20. for (var i = start; i <= end; i++) {
  21. var code = str.charCodeAt(i);
  22. if (code === REVERSE_SOLIDUS) {
  23. // special case at the ending
  24. if (i === end) {
  25. // if the next input code point is EOF, do nothing
  26. // otherwise include last quote as escaped
  27. if (i !== len - 1) {
  28. decoded = str.substr(i + 1);
  29. }
  30. break;
  31. }
  32. code = str.charCodeAt(++i);
  33. // ignore escaped newline
  34. if (code !== 0x0A && code !== 0x0C && code !== 0x0D) { // TODO: should treat a "CR/LF" pair (U+000D/U+000A) as a single white space character
  35. // https://drafts.csswg.org/css-syntax/#consume-escaped-code-point
  36. for (var j = 0; j < 6 && i + j <= end;) {
  37. code = str.charCodeAt(i + j);
  38. if (isHex(code)) {
  39. j++;
  40. } else {
  41. break;
  42. }
  43. }
  44. if (j > 0) {
  45. code = str.charCodeAt(i + j);
  46. // include space into sequence
  47. // TODO: add newline support
  48. if (code === WHITESPACE || code === TAB) {
  49. j++;
  50. }
  51. code = parseInt(str.substr(i, j), 16);
  52. if (
  53. (code === 0) || // If this number is zero,
  54. (code >= 0xD800 && code <= 0xDFFF) || // or is for a surrogate,
  55. (code > 0x10FFFF) // or is greater than the maximum allowed code point
  56. ) {
  57. // ... return U+FFFD REPLACEMENT CHARACTER
  58. code = 0xFFFD;
  59. }
  60. // FIXME: code above 0xFFFF will be converted incorrectly,
  61. // better to use String.fromCharPoint() but it lack of support by engines
  62. decoded += String.fromCharCode(code);
  63. i += j - 1;
  64. } else {
  65. decoded += str.charAt(i);
  66. }
  67. }
  68. } else {
  69. decoded += str.charAt(i);
  70. }
  71. }
  72. return decoded;
  73. }
  74. function encodeString(str, apostrophe) {
  75. var quote = apostrophe ? '\'' : '"';
  76. var quoteCode = apostrophe ? APOSTROPHE : QUOTATION_MARK;
  77. var encoded = quote;
  78. var wsBeforeHexIsNeeded = false;
  79. for (var i = 0; i < str.length; i++) {
  80. var code = str.charCodeAt(i);
  81. if (code <= 0x1F || code === AMPERSAND || code === LESSTHANSIGN || code === GREATERTHANSIGN) {
  82. encoded += '\\' + code.toString(16);
  83. wsBeforeHexIsNeeded = true;
  84. } else if (code === REVERSE_SOLIDUS || code === quoteCode) {
  85. encoded += '\\' + str.charAt(i);
  86. wsBeforeHexIsNeeded = false;
  87. } else {
  88. if (wsBeforeHexIsNeeded && isHex(code)) {
  89. encoded += ' ';
  90. }
  91. encoded += str.charAt(i);
  92. wsBeforeHexIsNeeded = false;
  93. }
  94. }
  95. encoded += quote;
  96. return encoded;
  97. }
  98. module.exports = {
  99. decode: decodeString,
  100. encode: encodeString
  101. };