index.d.ts 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. import { Tokenizer, TokenizerMode, type TokenHandler } from '../tokenizer/index.js';
  2. import { OpenElementStack, type StackHandler } from './open-element-stack.js';
  3. import { FormattingElementList } from './formatting-element-list.js';
  4. import { ERR, type ParserErrorHandler } from '../common/error-codes.js';
  5. import { TAG_ID as $, NS } from '../common/html.js';
  6. import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface.js';
  7. import { type Token, type CommentToken, type CharacterToken, type TagToken, type DoctypeToken, type EOFToken, type LocationWithAttributes } from '../common/token.js';
  8. declare enum InsertionMode {
  9. INITIAL = 0,
  10. BEFORE_HTML = 1,
  11. BEFORE_HEAD = 2,
  12. IN_HEAD = 3,
  13. IN_HEAD_NO_SCRIPT = 4,
  14. AFTER_HEAD = 5,
  15. IN_BODY = 6,
  16. TEXT = 7,
  17. IN_TABLE = 8,
  18. IN_TABLE_TEXT = 9,
  19. IN_CAPTION = 10,
  20. IN_COLUMN_GROUP = 11,
  21. IN_TABLE_BODY = 12,
  22. IN_ROW = 13,
  23. IN_CELL = 14,
  24. IN_SELECT = 15,
  25. IN_SELECT_IN_TABLE = 16,
  26. IN_TEMPLATE = 17,
  27. AFTER_BODY = 18,
  28. IN_FRAMESET = 19,
  29. AFTER_FRAMESET = 20,
  30. AFTER_AFTER_BODY = 21,
  31. AFTER_AFTER_FRAMESET = 22
  32. }
  33. export interface ParserOptions<T extends TreeAdapterTypeMap> {
  34. /**
  35. * The [scripting flag](https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag). If set
  36. * to `true`, `noscript` element content will be parsed as text.
  37. *
  38. * @default `true`
  39. */
  40. scriptingEnabled?: boolean;
  41. /**
  42. * Enables source code location information. When enabled, each node (except the root node)
  43. * will have a `sourceCodeLocation` property. If the node is not an empty element, `sourceCodeLocation` will
  44. * be a {@link ElementLocation} object, otherwise it will be {@link Location}.
  45. * If the element was implicitly created by the parser (as part of
  46. * [tree correction](https://html.spec.whatwg.org/multipage/syntax.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser)),
  47. * its `sourceCodeLocation` property will be `undefined`.
  48. *
  49. * @default `false`
  50. */
  51. sourceCodeLocationInfo?: boolean;
  52. /**
  53. * Specifies the resulting tree format.
  54. *
  55. * @default `treeAdapters.default`
  56. */
  57. treeAdapter?: TreeAdapter<T>;
  58. /**
  59. * Callback for parse errors.
  60. *
  61. * @default `null`
  62. */
  63. onParseError?: ParserErrorHandler | null;
  64. }
  65. export declare class Parser<T extends TreeAdapterTypeMap> implements TokenHandler, StackHandler<T> {
  66. /** @internal */
  67. fragmentContext: T['element'] | null;
  68. /** @internal */
  69. scriptHandler: null | ((pendingScript: T['element']) => void);
  70. treeAdapter: TreeAdapter<T>;
  71. /** @internal */
  72. onParseError: ParserErrorHandler | null;
  73. protected currentToken: Token | null;
  74. options: Required<ParserOptions<T>>;
  75. document: T['document'];
  76. constructor(options?: ParserOptions<T>, document?: T['document'],
  77. /** @internal */
  78. fragmentContext?: T['element'] | null,
  79. /** @internal */
  80. scriptHandler?: null | ((pendingScript: T['element']) => void));
  81. static parse<T extends TreeAdapterTypeMap>(html: string, options?: ParserOptions<T>): T['document'];
  82. static getFragmentParser<T extends TreeAdapterTypeMap>(fragmentContext?: T['parentNode'] | null, options?: ParserOptions<T>): Parser<T>;
  83. getFragment(): T['documentFragment'];
  84. tokenizer: Tokenizer;
  85. stopped: boolean;
  86. /** @internal */
  87. insertionMode: InsertionMode;
  88. /** @internal */
  89. originalInsertionMode: InsertionMode;
  90. /** @internal */
  91. fragmentContextID: $;
  92. /** @internal */
  93. headElement: null | T['element'];
  94. /** @internal */
  95. formElement: null | T['element'];
  96. /** @internal */
  97. openElements: OpenElementStack<T>;
  98. /** @internal */
  99. activeFormattingElements: FormattingElementList<T>;
  100. /** Indicates that the current node is not an element in the HTML namespace */
  101. protected currentNotInHTML: boolean;
  102. /**
  103. * The template insertion mode stack is maintained from the left.
  104. * Ie. the topmost element will always have index 0.
  105. *
  106. * @internal
  107. */
  108. tmplInsertionModeStack: InsertionMode[];
  109. /** @internal */
  110. pendingCharacterTokens: CharacterToken[];
  111. /** @internal */
  112. hasNonWhitespacePendingCharacterToken: boolean;
  113. /** @internal */
  114. framesetOk: boolean;
  115. /** @internal */
  116. skipNextNewLine: boolean;
  117. /** @internal */
  118. fosterParentingEnabled: boolean;
  119. /** @internal */
  120. _err(token: Token, code: ERR, beforeToken?: boolean): void;
  121. /** @internal */
  122. onItemPush(node: T['parentNode'], tid: number, isTop: boolean): void;
  123. /** @internal */
  124. onItemPop(node: T['parentNode'], isTop: boolean): void;
  125. protected _setContextModes(current: T['parentNode'], tid: number): void;
  126. /** @protected */
  127. _switchToTextParsing(currentToken: TagToken, nextTokenizerState: (typeof TokenizerMode)[keyof typeof TokenizerMode]): void;
  128. switchToPlaintextParsing(): void;
  129. /** @protected */
  130. _getAdjustedCurrentElement(): T['element'];
  131. /** @protected */
  132. _findFormInFragmentContext(): void;
  133. protected _initTokenizerForFragmentParsing(): void;
  134. /** @protected */
  135. _setDocumentType(token: DoctypeToken): void;
  136. /** @protected */
  137. _attachElementToTree(element: T['element'], location: LocationWithAttributes | null): void;
  138. /**
  139. * For self-closing tags. Add an element to the tree, but skip adding it
  140. * to the stack.
  141. */
  142. /** @protected */
  143. _appendElement(token: TagToken, namespaceURI: NS): void;
  144. /** @protected */
  145. _insertElement(token: TagToken, namespaceURI: NS): void;
  146. /** @protected */
  147. _insertFakeElement(tagName: string, tagID: $): void;
  148. /** @protected */
  149. _insertTemplate(token: TagToken): void;
  150. /** @protected */
  151. _insertFakeRootElement(): void;
  152. /** @protected */
  153. _appendCommentNode(token: CommentToken, parent: T['parentNode']): void;
  154. /** @protected */
  155. _insertCharacters(token: CharacterToken): void;
  156. /** @protected */
  157. _adoptNodes(donor: T['parentNode'], recipient: T['parentNode']): void;
  158. /** @protected */
  159. _setEndLocation(element: T['element'], closingToken: Token): void;
  160. protected shouldProcessStartTagTokenInForeignContent(token: TagToken): boolean;
  161. /** @protected */
  162. _processToken(token: Token): void;
  163. /** @protected */
  164. _isIntegrationPoint(tid: $, element: T['element'], foreignNS?: NS): boolean;
  165. /** @protected */
  166. _reconstructActiveFormattingElements(): void;
  167. /** @protected */
  168. _closeTableCell(): void;
  169. /** @protected */
  170. _closePElement(): void;
  171. /** @protected */
  172. _resetInsertionMode(): void;
  173. /** @protected */
  174. _resetInsertionModeForSelect(selectIdx: number): void;
  175. /** @protected */
  176. _isElementCausesFosterParenting(tn: $): boolean;
  177. /** @protected */
  178. _shouldFosterParentOnInsertion(): boolean;
  179. /** @protected */
  180. _findFosterParentingLocation(): {
  181. parent: T['parentNode'];
  182. beforeElement: T['element'] | null;
  183. };
  184. /** @protected */
  185. _fosterParentElement(element: T['element']): void;
  186. /** @protected */
  187. _isSpecialElement(element: T['element'], id: $): boolean;
  188. /** @internal */
  189. onCharacter(token: CharacterToken): void;
  190. /** @internal */
  191. onNullCharacter(token: CharacterToken): void;
  192. /** @internal */
  193. onComment(token: CommentToken): void;
  194. /** @internal */
  195. onDoctype(token: DoctypeToken): void;
  196. /** @internal */
  197. onStartTag(token: TagToken): void;
  198. /**
  199. * Processes a given start tag.
  200. *
  201. * `onStartTag` checks if a self-closing tag was recognized. When a token
  202. * is moved inbetween multiple insertion modes, this check for self-closing
  203. * could lead to false positives. To avoid this, `_processStartTag` is used
  204. * for nested calls.
  205. *
  206. * @param token The token to process.
  207. * @protected
  208. */
  209. _processStartTag(token: TagToken): void;
  210. /** @protected */
  211. _startTagOutsideForeignContent(token: TagToken): void;
  212. /** @internal */
  213. onEndTag(token: TagToken): void;
  214. /** @protected */
  215. _endTagOutsideForeignContent(token: TagToken): void;
  216. /** @internal */
  217. onEof(token: EOFToken): void;
  218. /** @internal */
  219. onWhitespaceCharacter(token: CharacterToken): void;
  220. }
  221. export {};