Tokenizer.d.ts 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. export declare enum QuoteType {
  2. NoValue = 0,
  3. Unquoted = 1,
  4. Single = 2,
  5. Double = 3
  6. }
  7. export interface Callbacks {
  8. onattribdata(start: number, endIndex: number): void;
  9. onattribentity(codepoint: number): void;
  10. onattribend(quote: QuoteType, endIndex: number): void;
  11. onattribname(start: number, endIndex: number): void;
  12. oncdata(start: number, endIndex: number, endOffset: number): void;
  13. onclosetag(start: number, endIndex: number): void;
  14. oncomment(start: number, endIndex: number, endOffset: number): void;
  15. ondeclaration(start: number, endIndex: number): void;
  16. onend(): void;
  17. onopentagend(endIndex: number): void;
  18. onopentagname(start: number, endIndex: number): void;
  19. onprocessinginstruction(start: number, endIndex: number): void;
  20. onselfclosingtag(endIndex: number): void;
  21. ontext(start: number, endIndex: number): void;
  22. ontextentity(codepoint: number, endIndex: number): void;
  23. }
  24. export default class Tokenizer {
  25. private readonly cbs;
  26. /** The current state the tokenizer is in. */
  27. private state;
  28. /** The read buffer. */
  29. private buffer;
  30. /** The beginning of the section that is currently being read. */
  31. private sectionStart;
  32. /** The index within the buffer that we are currently looking at. */
  33. private index;
  34. /** The start of the last entity. */
  35. private entityStart;
  36. /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
  37. private baseState;
  38. /** For special parsing behavior inside of script and style tags. */
  39. private isSpecial;
  40. /** Indicates whether the tokenizer has been paused. */
  41. running: boolean;
  42. /** The offset of the current buffer. */
  43. private offset;
  44. private readonly xmlMode;
  45. private readonly decodeEntities;
  46. private readonly entityDecoder;
  47. constructor({ xmlMode, decodeEntities, }: {
  48. xmlMode?: boolean;
  49. decodeEntities?: boolean;
  50. }, cbs: Callbacks);
  51. reset(): void;
  52. write(chunk: string): void;
  53. end(): void;
  54. pause(): void;
  55. resume(): void;
  56. private stateText;
  57. private currentSequence;
  58. private sequenceIndex;
  59. private stateSpecialStartSequence;
  60. /** Look for an end tag. For <title> tags, also decode entities. */
  61. private stateInSpecialTag;
  62. private stateCDATASequence;
  63. /**
  64. * When we wait for one specific character, we can speed things up
  65. * by skipping through the buffer until we find it.
  66. *
  67. * @returns Whether the character was found.
  68. */
  69. private fastForwardTo;
  70. /**
  71. * Comments and CDATA end with `-->` and `]]>`.
  72. *
  73. * Their common qualities are:
  74. * - Their end sequences have a distinct character they start with.
  75. * - That character is then repeated, so we have to check multiple repeats.
  76. * - All characters but the start character of the sequence can be skipped.
  77. */
  78. private stateInCommentLike;
  79. /**
  80. * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
  81. *
  82. * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
  83. * We allow anything that wouldn't end the tag.
  84. */
  85. private isTagStartChar;
  86. private startSpecial;
  87. private stateBeforeTagName;
  88. private stateInTagName;
  89. private stateBeforeClosingTagName;
  90. private stateInClosingTagName;
  91. private stateAfterClosingTagName;
  92. private stateBeforeAttributeName;
  93. private stateInSelfClosingTag;
  94. private stateInAttributeName;
  95. private stateAfterAttributeName;
  96. private stateBeforeAttributeValue;
  97. private handleInAttributeValue;
  98. private stateInAttributeValueDoubleQuotes;
  99. private stateInAttributeValueSingleQuotes;
  100. private stateInAttributeValueNoQuotes;
  101. private stateBeforeDeclaration;
  102. private stateInDeclaration;
  103. private stateInProcessingInstruction;
  104. private stateBeforeComment;
  105. private stateInSpecialComment;
  106. private stateBeforeSpecialS;
  107. private stateBeforeSpecialT;
  108. private startEntity;
  109. private stateInEntity;
  110. /**
  111. * Remove data that has already been consumed from the buffer.
  112. */
  113. private cleanup;
  114. private shouldContinue;
  115. /**
  116. * Iterates through the buffer, calling the function corresponding to the current state.
  117. *
  118. * States that are more likely to be hit are higher up, as a performance improvement.
  119. */
  120. private parse;
  121. private finish;
  122. /** Handle any trailing data. */
  123. private handleTrailingData;
  124. private emitCodePoint;
  125. }
  126. //# sourceMappingURL=Tokenizer.d.ts.map