html4-entities.js 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. var HTML_ALPHA = ['apos', 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen', 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo', 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn', 'sup2', 'sup3', 'acute', 'micro', 'para', 'middot', 'cedil', 'sup1', 'ordm', 'raquo', 'frac14', 'frac12', 'frac34', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde', 'Auml', 'Aring', 'Aelig', 'Ccedil', 'Egrave', 'Eacute', 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml', 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde', 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc', 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute', 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil', 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute', 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute', 'ocirc', 'otilde', 'ouml', 'divide', 'Oslash', 'ugrave', 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml', 'quot', 'amp', 'lt', 'gt', 'oelig', 'oelig', 'scaron', 'scaron', 'yuml', 'circ', 'tilde', 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm', 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo', 'ldquo', 'rdquo', 'bdquo', 'dagger', 'dagger', 'permil', 'lsaquo', 'rsaquo', 'euro', 'fnof', 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma', 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym', 'upsih', 'piv', 'bull', 'hellip', 'prime', 'prime', 'oline', 'frasl', 'weierp', 'image', 'real', 'trade', 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr', 'crarr', 'larr', 'uarr', 'rarr', 'darr', 'harr', 'forall', 'part', 'exist', 'empty', 'nabla', 'isin', 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast', 'radic', 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', 'int', 'there4', 'sim', 'cong', 'asymp', 'ne', 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp', 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang', 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams'];
  2. var HTML_CODES = [39, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 34, 38, 60, 62, 338, 339, 352, 353, 376, 710, 732, 8194, 8195, 8201, 8204, 8205, 8206, 8207, 8211, 8212, 8216, 8217, 8218, 8220, 8221, 8222, 8224, 8225, 8240, 8249, 8250, 8364, 402, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 931, 932, 933, 934, 935, 936, 937, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 977, 978, 982, 8226, 8230, 8242, 8243, 8254, 8260, 8472, 8465, 8476, 8482, 8501, 8592, 8593, 8594, 8595, 8596, 8629, 8656, 8657, 8658, 8659, 8660, 8704, 8706, 8707, 8709, 8711, 8712, 8713, 8715, 8719, 8721, 8722, 8727, 8730, 8733, 8734, 8736, 8743, 8744, 8745, 8746, 8747, 8756, 8764, 8773, 8776, 8800, 8801, 8804, 8805, 8834, 8835, 8836, 8838, 8839, 8853, 8855, 8869, 8901, 8968, 8969, 8970, 8971, 9001, 9002, 9674, 9824, 9827, 9829, 9830];
  3. var alphaIndex = {};
  4. var numIndex = {};
  5. var i = 0;
  6. var length = HTML_ALPHA.length;
  7. while (i < length) {
  8. var a = HTML_ALPHA[i];
  9. var c = HTML_CODES[i];
  10. alphaIndex[a] = String.fromCharCode(c);
  11. numIndex[c] = a;
  12. i++;
  13. }
  14. /**
  15. * @constructor
  16. */
  17. function Html4Entities() {}
  18. /**
  19. * @param {String} str
  20. * @returns {String}
  21. */
  22. Html4Entities.prototype.decode = function(str) {
  23. if (str.length === 0) {
  24. return '';
  25. }
  26. return str.replace(/&(#?[\w\d]+);?/g, function(s, entity) {
  27. var chr;
  28. if (entity.charAt(0) === "#") {
  29. var code = entity.charAt(1).toLowerCase() === 'x' ?
  30. parseInt(entity.substr(2), 16) :
  31. parseInt(entity.substr(1));
  32. if (!(isNaN(code) || code < -32768 || code > 65535)) {
  33. chr = String.fromCharCode(code);
  34. }
  35. } else {
  36. chr = alphaIndex[entity];
  37. }
  38. return chr || s;
  39. });
  40. };
  41. /**
  42. * @param {String} str
  43. * @returns {String}
  44. */
  45. Html4Entities.decode = function(str) {
  46. return new Html4Entities().decode(str);
  47. };
  48. /**
  49. * @param {String} str
  50. * @returns {String}
  51. */
  52. Html4Entities.prototype.encode = function(str) {
  53. var strLength = str.length;
  54. if (strLength === 0) {
  55. return '';
  56. }
  57. var result = '';
  58. var i = 0;
  59. while (i < strLength) {
  60. var alpha = numIndex[str.charCodeAt(i)];
  61. result += alpha ? "&" + alpha + ";" : str.charAt(i);
  62. i++;
  63. }
  64. return result;
  65. };
  66. /**
  67. * @param {String} str
  68. * @returns {String}
  69. */
  70. Html4Entities.encode = function(str) {
  71. return new Html4Entities().encode(str);
  72. };
  73. /**
  74. * @param {String} str
  75. * @returns {String}
  76. */
  77. Html4Entities.prototype.encodeNonUTF = function(str) {
  78. var strLength = str.length;
  79. if (strLength === 0) {
  80. return '';
  81. }
  82. var result = '';
  83. var i = 0;
  84. while (i < strLength) {
  85. var cc = str.charCodeAt(i);
  86. var alpha = numIndex[cc];
  87. if (alpha) {
  88. result += "&" + alpha + ";";
  89. } else if (cc < 32 || cc > 126) {
  90. result += "&#" + cc + ";";
  91. } else {
  92. result += str.charAt(i);
  93. }
  94. i++;
  95. }
  96. return result;
  97. };
  98. /**
  99. * @param {String} str
  100. * @returns {String}
  101. */
  102. Html4Entities.encodeNonUTF = function(str) {
  103. return new Html4Entities().encodeNonUTF(str);
  104. };
  105. /**
  106. * @param {String} str
  107. * @returns {String}
  108. */
  109. Html4Entities.prototype.encodeNonASCII = function(str) {
  110. var strLength = str.length;
  111. if (strLength === 0) {
  112. return '';
  113. }
  114. var result = '';
  115. var i = 0;
  116. while (i < strLength) {
  117. var c = str.charCodeAt(i);
  118. if (c <= 255) {
  119. result += str[i++];
  120. continue;
  121. }
  122. result += '&#' + c + ';';
  123. i++;
  124. }
  125. return result;
  126. };
  127. /**
  128. * @param {String} str
  129. * @returns {String}
  130. */
  131. Html4Entities.encodeNonASCII = function(str) {
  132. return new Html4Entities().encodeNonASCII(str);
  133. };
  134. module.exports = Html4Entities;