index.d.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. import { Preprocessor } from './preprocessor.js';
  2. import { type Token, type CharacterToken, type DoctypeToken, type TagToken, type EOFToken, type CommentToken, type Attribute, type Location } from '../common/token.js';
  3. import { EntityDecoder } from 'entities/lib/decode.js';
  4. import { ERR, type ParserErrorHandler } from '../common/error-codes.js';
  5. declare const enum State {
  6. DATA = 0,
  7. RCDATA = 1,
  8. RAWTEXT = 2,
  9. SCRIPT_DATA = 3,
  10. PLAINTEXT = 4,
  11. TAG_OPEN = 5,
  12. END_TAG_OPEN = 6,
  13. TAG_NAME = 7,
  14. RCDATA_LESS_THAN_SIGN = 8,
  15. RCDATA_END_TAG_OPEN = 9,
  16. RCDATA_END_TAG_NAME = 10,
  17. RAWTEXT_LESS_THAN_SIGN = 11,
  18. RAWTEXT_END_TAG_OPEN = 12,
  19. RAWTEXT_END_TAG_NAME = 13,
  20. SCRIPT_DATA_LESS_THAN_SIGN = 14,
  21. SCRIPT_DATA_END_TAG_OPEN = 15,
  22. SCRIPT_DATA_END_TAG_NAME = 16,
  23. SCRIPT_DATA_ESCAPE_START = 17,
  24. SCRIPT_DATA_ESCAPE_START_DASH = 18,
  25. SCRIPT_DATA_ESCAPED = 19,
  26. SCRIPT_DATA_ESCAPED_DASH = 20,
  27. SCRIPT_DATA_ESCAPED_DASH_DASH = 21,
  28. SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 22,
  29. SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 23,
  30. SCRIPT_DATA_ESCAPED_END_TAG_NAME = 24,
  31. SCRIPT_DATA_DOUBLE_ESCAPE_START = 25,
  32. SCRIPT_DATA_DOUBLE_ESCAPED = 26,
  33. SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 27,
  34. SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 28,
  35. SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 29,
  36. SCRIPT_DATA_DOUBLE_ESCAPE_END = 30,
  37. BEFORE_ATTRIBUTE_NAME = 31,
  38. ATTRIBUTE_NAME = 32,
  39. AFTER_ATTRIBUTE_NAME = 33,
  40. BEFORE_ATTRIBUTE_VALUE = 34,
  41. ATTRIBUTE_VALUE_DOUBLE_QUOTED = 35,
  42. ATTRIBUTE_VALUE_SINGLE_QUOTED = 36,
  43. ATTRIBUTE_VALUE_UNQUOTED = 37,
  44. AFTER_ATTRIBUTE_VALUE_QUOTED = 38,
  45. SELF_CLOSING_START_TAG = 39,
  46. BOGUS_COMMENT = 40,
  47. MARKUP_DECLARATION_OPEN = 41,
  48. COMMENT_START = 42,
  49. COMMENT_START_DASH = 43,
  50. COMMENT = 44,
  51. COMMENT_LESS_THAN_SIGN = 45,
  52. COMMENT_LESS_THAN_SIGN_BANG = 46,
  53. COMMENT_LESS_THAN_SIGN_BANG_DASH = 47,
  54. COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 48,
  55. COMMENT_END_DASH = 49,
  56. COMMENT_END = 50,
  57. COMMENT_END_BANG = 51,
  58. DOCTYPE = 52,
  59. BEFORE_DOCTYPE_NAME = 53,
  60. DOCTYPE_NAME = 54,
  61. AFTER_DOCTYPE_NAME = 55,
  62. AFTER_DOCTYPE_PUBLIC_KEYWORD = 56,
  63. BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 57,
  64. DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 58,
  65. DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 59,
  66. AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 60,
  67. BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 61,
  68. AFTER_DOCTYPE_SYSTEM_KEYWORD = 62,
  69. BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 63,
  70. DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 64,
  71. DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 65,
  72. AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 66,
  73. BOGUS_DOCTYPE = 67,
  74. CDATA_SECTION = 68,
  75. CDATA_SECTION_BRACKET = 69,
  76. CDATA_SECTION_END = 70,
  77. CHARACTER_REFERENCE = 71,
  78. AMBIGUOUS_AMPERSAND = 72
  79. }
  80. export declare const TokenizerMode: {
  81. readonly DATA: State.DATA;
  82. readonly RCDATA: State.RCDATA;
  83. readonly RAWTEXT: State.RAWTEXT;
  84. readonly SCRIPT_DATA: State.SCRIPT_DATA;
  85. readonly PLAINTEXT: State.PLAINTEXT;
  86. readonly CDATA_SECTION: State.CDATA_SECTION;
  87. };
  88. export interface TokenizerOptions {
  89. sourceCodeLocationInfo?: boolean;
  90. }
  91. export interface TokenHandler {
  92. onComment(token: CommentToken): void;
  93. onDoctype(token: DoctypeToken): void;
  94. onStartTag(token: TagToken): void;
  95. onEndTag(token: TagToken): void;
  96. onEof(token: EOFToken): void;
  97. onCharacter(token: CharacterToken): void;
  98. onNullCharacter(token: CharacterToken): void;
  99. onWhitespaceCharacter(token: CharacterToken): void;
  100. onParseError?: ParserErrorHandler | null;
  101. }
  102. export declare class Tokenizer {
  103. protected options: TokenizerOptions;
  104. protected handler: TokenHandler;
  105. preprocessor: Preprocessor;
  106. protected paused: boolean;
  107. /** Ensures that the parsing loop isn't run multiple times at once. */
  108. protected inLoop: boolean;
  109. /**
  110. * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
  111. * and that it is not an integration point for either MathML or HTML.
  112. *
  113. * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
  114. */
  115. inForeignNode: boolean;
  116. lastStartTagName: string;
  117. active: boolean;
  118. state: State;
  119. protected returnState: State;
  120. /**
  121. * We use `entities`' `EntityDecoder` to parse character references.
  122. *
  123. * All of the following states are handled by the `EntityDecoder`:
  124. *
  125. * - Named character reference state
  126. * - Numeric character reference state
  127. * - Hexademical character reference start state
  128. * - Hexademical character reference state
  129. * - Decimal character reference state
  130. * - Numeric character reference end state
  131. */
  132. protected entityDecoder: EntityDecoder;
  133. protected entityStartPos: number;
  134. protected consumedAfterSnapshot: number;
  135. protected currentLocation: Location | null;
  136. protected currentCharacterToken: CharacterToken | null;
  137. protected currentToken: Token | null;
  138. protected currentAttr: Attribute;
  139. constructor(options: TokenizerOptions, handler: TokenHandler);
  140. protected _err(code: ERR, cpOffset?: number): void;
  141. protected getCurrentLocation(offset: number): Location | null;
  142. protected _runParsingLoop(): void;
  143. pause(): void;
  144. resume(writeCallback?: () => void): void;
  145. write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void;
  146. insertHtmlAtCurrentPos(chunk: string): void;
  147. protected _ensureHibernation(): boolean;
  148. protected _consume(): number;
  149. protected _advanceBy(count: number): void;
  150. protected _consumeSequenceIfMatch(pattern: string, caseSensitive: boolean): boolean;
  151. protected _createStartTagToken(): void;
  152. protected _createEndTagToken(): void;
  153. protected _createCommentToken(offset: number): void;
  154. protected _createDoctypeToken(initialName: string | null): void;
  155. protected _createCharacterToken(type: CharacterToken['type'], chars: string): void;
  156. protected _createAttr(attrNameFirstCh: string): void;
  157. protected _leaveAttrName(): void;
  158. protected _leaveAttrValue(): void;
  159. protected prepareToken(ct: Token): void;
  160. protected emitCurrentTagToken(): void;
  161. protected emitCurrentComment(ct: CommentToken): void;
  162. protected emitCurrentDoctype(ct: DoctypeToken): void;
  163. protected _emitCurrentCharacterToken(nextLocation: Location | null): void;
  164. protected _emitEOFToken(): void;
  165. protected _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string): void;
  166. protected _emitCodePoint(cp: number): void;
  167. protected _emitChars(ch: string): void;
  168. protected _startCharacterReference(): void;
  169. protected _isCharacterReferenceInAttribute(): boolean;
  170. protected _flushCodePointConsumedAsCharacterReference(cp: number): void;
  171. protected _callState(cp: number): void;
  172. protected _stateData(cp: number): void;
  173. protected _stateRcdata(cp: number): void;
  174. protected _stateRawtext(cp: number): void;
  175. protected _stateScriptData(cp: number): void;
  176. protected _statePlaintext(cp: number): void;
  177. protected _stateTagOpen(cp: number): void;
  178. protected _stateEndTagOpen(cp: number): void;
  179. protected _stateTagName(cp: number): void;
  180. protected _stateRcdataLessThanSign(cp: number): void;
  181. protected _stateRcdataEndTagOpen(cp: number): void;
  182. protected handleSpecialEndTag(_cp: number): boolean;
  183. protected _stateRcdataEndTagName(cp: number): void;
  184. protected _stateRawtextLessThanSign(cp: number): void;
  185. protected _stateRawtextEndTagOpen(cp: number): void;
  186. protected _stateRawtextEndTagName(cp: number): void;
  187. protected _stateScriptDataLessThanSign(cp: number): void;
  188. protected _stateScriptDataEndTagOpen(cp: number): void;
  189. protected _stateScriptDataEndTagName(cp: number): void;
  190. protected _stateScriptDataEscapeStart(cp: number): void;
  191. protected _stateScriptDataEscapeStartDash(cp: number): void;
  192. protected _stateScriptDataEscaped(cp: number): void;
  193. protected _stateScriptDataEscapedDash(cp: number): void;
  194. protected _stateScriptDataEscapedDashDash(cp: number): void;
  195. protected _stateScriptDataEscapedLessThanSign(cp: number): void;
  196. protected _stateScriptDataEscapedEndTagOpen(cp: number): void;
  197. protected _stateScriptDataEscapedEndTagName(cp: number): void;
  198. protected _stateScriptDataDoubleEscapeStart(cp: number): void;
  199. protected _stateScriptDataDoubleEscaped(cp: number): void;
  200. protected _stateScriptDataDoubleEscapedDash(cp: number): void;
  201. protected _stateScriptDataDoubleEscapedDashDash(cp: number): void;
  202. protected _stateScriptDataDoubleEscapedLessThanSign(cp: number): void;
  203. protected _stateScriptDataDoubleEscapeEnd(cp: number): void;
  204. protected _stateBeforeAttributeName(cp: number): void;
  205. protected _stateAttributeName(cp: number): void;
  206. protected _stateAfterAttributeName(cp: number): void;
  207. protected _stateBeforeAttributeValue(cp: number): void;
  208. protected _stateAttributeValueDoubleQuoted(cp: number): void;
  209. protected _stateAttributeValueSingleQuoted(cp: number): void;
  210. protected _stateAttributeValueUnquoted(cp: number): void;
  211. protected _stateAfterAttributeValueQuoted(cp: number): void;
  212. protected _stateSelfClosingStartTag(cp: number): void;
  213. protected _stateBogusComment(cp: number): void;
  214. protected _stateMarkupDeclarationOpen(cp: number): void;
  215. protected _stateCommentStart(cp: number): void;
  216. protected _stateCommentStartDash(cp: number): void;
  217. protected _stateComment(cp: number): void;
  218. protected _stateCommentLessThanSign(cp: number): void;
  219. protected _stateCommentLessThanSignBang(cp: number): void;
  220. protected _stateCommentLessThanSignBangDash(cp: number): void;
  221. protected _stateCommentLessThanSignBangDashDash(cp: number): void;
  222. protected _stateCommentEndDash(cp: number): void;
  223. protected _stateCommentEnd(cp: number): void;
  224. protected _stateCommentEndBang(cp: number): void;
  225. protected _stateDoctype(cp: number): void;
  226. protected _stateBeforeDoctypeName(cp: number): void;
  227. protected _stateDoctypeName(cp: number): void;
  228. protected _stateAfterDoctypeName(cp: number): void;
  229. protected _stateAfterDoctypePublicKeyword(cp: number): void;
  230. protected _stateBeforeDoctypePublicIdentifier(cp: number): void;
  231. protected _stateDoctypePublicIdentifierDoubleQuoted(cp: number): void;
  232. protected _stateDoctypePublicIdentifierSingleQuoted(cp: number): void;
  233. protected _stateAfterDoctypePublicIdentifier(cp: number): void;
  234. protected _stateBetweenDoctypePublicAndSystemIdentifiers(cp: number): void;
  235. protected _stateAfterDoctypeSystemKeyword(cp: number): void;
  236. protected _stateBeforeDoctypeSystemIdentifier(cp: number): void;
  237. protected _stateDoctypeSystemIdentifierDoubleQuoted(cp: number): void;
  238. protected _stateDoctypeSystemIdentifierSingleQuoted(cp: number): void;
  239. protected _stateAfterDoctypeSystemIdentifier(cp: number): void;
  240. protected _stateBogusDoctype(cp: number): void;
  241. protected _stateCdataSection(cp: number): void;
  242. protected _stateCdataSectionBracket(cp: number): void;
  243. protected _stateCdataSectionEnd(cp: number): void;
  244. protected _stateCharacterReference(): void;
  245. protected _stateAmbiguousAmpersand(cp: number): void;
  246. }
  247. export {};