unicode.js 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. const UNDEFINED_CODE_POINTS = new Set([
  2. 65534, 65535, 131070, 131071, 196606, 196607, 262142, 262143, 327678, 327679, 393214,
  3. 393215, 458750, 458751, 524286, 524287, 589822, 589823, 655358, 655359, 720894,
  4. 720895, 786430, 786431, 851966, 851967, 917502, 917503, 983038, 983039, 1048574,
  5. 1048575, 1114110, 1114111,
  6. ]);
  7. export const REPLACEMENT_CHARACTER = '\uFFFD';
  8. export var CODE_POINTS;
  9. (function (CODE_POINTS) {
  10. CODE_POINTS[CODE_POINTS["EOF"] = -1] = "EOF";
  11. CODE_POINTS[CODE_POINTS["NULL"] = 0] = "NULL";
  12. CODE_POINTS[CODE_POINTS["TABULATION"] = 9] = "TABULATION";
  13. CODE_POINTS[CODE_POINTS["CARRIAGE_RETURN"] = 13] = "CARRIAGE_RETURN";
  14. CODE_POINTS[CODE_POINTS["LINE_FEED"] = 10] = "LINE_FEED";
  15. CODE_POINTS[CODE_POINTS["FORM_FEED"] = 12] = "FORM_FEED";
  16. CODE_POINTS[CODE_POINTS["SPACE"] = 32] = "SPACE";
  17. CODE_POINTS[CODE_POINTS["EXCLAMATION_MARK"] = 33] = "EXCLAMATION_MARK";
  18. CODE_POINTS[CODE_POINTS["QUOTATION_MARK"] = 34] = "QUOTATION_MARK";
  19. CODE_POINTS[CODE_POINTS["AMPERSAND"] = 38] = "AMPERSAND";
  20. CODE_POINTS[CODE_POINTS["APOSTROPHE"] = 39] = "APOSTROPHE";
  21. CODE_POINTS[CODE_POINTS["HYPHEN_MINUS"] = 45] = "HYPHEN_MINUS";
  22. CODE_POINTS[CODE_POINTS["SOLIDUS"] = 47] = "SOLIDUS";
  23. CODE_POINTS[CODE_POINTS["DIGIT_0"] = 48] = "DIGIT_0";
  24. CODE_POINTS[CODE_POINTS["DIGIT_9"] = 57] = "DIGIT_9";
  25. CODE_POINTS[CODE_POINTS["SEMICOLON"] = 59] = "SEMICOLON";
  26. CODE_POINTS[CODE_POINTS["LESS_THAN_SIGN"] = 60] = "LESS_THAN_SIGN";
  27. CODE_POINTS[CODE_POINTS["EQUALS_SIGN"] = 61] = "EQUALS_SIGN";
  28. CODE_POINTS[CODE_POINTS["GREATER_THAN_SIGN"] = 62] = "GREATER_THAN_SIGN";
  29. CODE_POINTS[CODE_POINTS["QUESTION_MARK"] = 63] = "QUESTION_MARK";
  30. CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_A"] = 65] = "LATIN_CAPITAL_A";
  31. CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_Z"] = 90] = "LATIN_CAPITAL_Z";
  32. CODE_POINTS[CODE_POINTS["RIGHT_SQUARE_BRACKET"] = 93] = "RIGHT_SQUARE_BRACKET";
  33. CODE_POINTS[CODE_POINTS["GRAVE_ACCENT"] = 96] = "GRAVE_ACCENT";
  34. CODE_POINTS[CODE_POINTS["LATIN_SMALL_A"] = 97] = "LATIN_SMALL_A";
  35. CODE_POINTS[CODE_POINTS["LATIN_SMALL_Z"] = 122] = "LATIN_SMALL_Z";
  36. })(CODE_POINTS || (CODE_POINTS = {}));
  37. export const SEQUENCES = {
  38. DASH_DASH: '--',
  39. CDATA_START: '[CDATA[',
  40. DOCTYPE: 'doctype',
  41. SCRIPT: 'script',
  42. PUBLIC: 'public',
  43. SYSTEM: 'system',
  44. };
  45. //Surrogates
  46. export function isSurrogate(cp) {
  47. return cp >= 55296 && cp <= 57343;
  48. }
  49. export function isSurrogatePair(cp) {
  50. return cp >= 56320 && cp <= 57343;
  51. }
  52. export function getSurrogatePairCodePoint(cp1, cp2) {
  53. return (cp1 - 55296) * 1024 + 9216 + cp2;
  54. }
  55. //NOTE: excluding NULL and ASCII whitespace
  56. export function isControlCodePoint(cp) {
  57. return ((cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
  58. (cp >= 0x7f && cp <= 0x9f));
  59. }
  60. export function isUndefinedCodePoint(cp) {
  61. return (cp >= 64976 && cp <= 65007) || UNDEFINED_CODE_POINTS.has(cp);
  62. }