Parser.d.ts 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. import Tokenizer, { type Callbacks, QuoteType } from "./Tokenizer.js";
  2. export interface ParserOptions {
  3. /**
  4. * Indicates whether special tags (`<script>`, `<style>`, and `<title>`) should get special treatment
  5. * and if "empty" tags (eg. `<br>`) can have children. If `false`, the content of special tags
  6. * will be text only. For feeds and other XML content (documents that don't consist of HTML),
  7. * set this to `true`.
  8. *
  9. * @default false
  10. */
  11. xmlMode?: boolean;
  12. /**
  13. * Decode entities within the document.
  14. *
  15. * @default true
  16. */
  17. decodeEntities?: boolean;
  18. /**
  19. * If set to true, all tags will be lowercased.
  20. *
  21. * @default !xmlMode
  22. */
  23. lowerCaseTags?: boolean;
  24. /**
  25. * If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed.
  26. *
  27. * @default !xmlMode
  28. */
  29. lowerCaseAttributeNames?: boolean;
  30. /**
  31. * If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled.
  32. * NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text.
  33. *
  34. * @default xmlMode
  35. */
  36. recognizeCDATA?: boolean;
  37. /**
  38. * If set to `true`, self-closing tags will trigger the onclosetag event even if xmlMode is not set to `true`.
  39. * NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized.
  40. *
  41. * @default xmlMode
  42. */
  43. recognizeSelfClosing?: boolean;
  44. /**
  45. * Allows the default tokenizer to be overwritten.
  46. */
  47. Tokenizer?: typeof Tokenizer;
  48. }
  49. export interface Handler {
  50. onparserinit(parser: Parser): void;
  51. /**
  52. * Resets the handler back to starting state
  53. */
  54. onreset(): void;
  55. /**
  56. * Signals the handler that parsing is done
  57. */
  58. onend(): void;
  59. onerror(error: Error): void;
  60. onclosetag(name: string, isImplied: boolean): void;
  61. onopentagname(name: string): void;
  62. /**
  63. *
  64. * @param name Name of the attribute
  65. * @param value Value of the attribute.
  66. * @param quote Quotes used around the attribute. `null` if the attribute has no quotes around the value, `undefined` if the attribute has no value.
  67. */
  68. onattribute(name: string, value: string, quote?: string | undefined | null): void;
  69. onopentag(name: string, attribs: {
  70. [s: string]: string;
  71. }, isImplied: boolean): void;
  72. ontext(data: string): void;
  73. oncomment(data: string): void;
  74. oncdatastart(): void;
  75. oncdataend(): void;
  76. oncommentend(): void;
  77. onprocessinginstruction(name: string, data: string): void;
  78. }
  79. export declare class Parser implements Callbacks {
  80. private readonly options;
  81. /** The start index of the last event. */
  82. startIndex: number;
  83. /** The end index of the last event. */
  84. endIndex: number;
  85. /**
  86. * Store the start index of the current open tag,
  87. * so we can update the start index for attributes.
  88. */
  89. private openTagStart;
  90. private tagname;
  91. private attribname;
  92. private attribvalue;
  93. private attribs;
  94. private readonly stack;
  95. /** Determines whether self-closing tags are recognized. */
  96. private readonly foreignContext;
  97. private readonly cbs;
  98. private readonly lowerCaseTagNames;
  99. private readonly lowerCaseAttributeNames;
  100. private readonly recognizeSelfClosing;
  101. /** We are parsing HTML. Inverse of the `xmlMode` option. */
  102. private readonly htmlMode;
  103. private readonly tokenizer;
  104. private readonly buffers;
  105. private bufferOffset;
  106. /** The index of the last written buffer. Used when resuming after a `pause()`. */
  107. private writeIndex;
  108. /** Indicates whether the parser has finished running / `.end` has been called. */
  109. private ended;
  110. constructor(cbs?: Partial<Handler> | null, options?: ParserOptions);
  111. /** @internal */
  112. ontext(start: number, endIndex: number): void;
  113. /** @internal */
  114. ontextentity(cp: number, endIndex: number): void;
  115. /**
  116. * Checks if the current tag is a void element. Override this if you want
  117. * to specify your own additional void elements.
  118. */
  119. protected isVoidElement(name: string): boolean;
  120. /** @internal */
  121. onopentagname(start: number, endIndex: number): void;
  122. private emitOpenTag;
  123. private endOpenTag;
  124. /** @internal */
  125. onopentagend(endIndex: number): void;
  126. /** @internal */
  127. onclosetag(start: number, endIndex: number): void;
  128. /** @internal */
  129. onselfclosingtag(endIndex: number): void;
  130. private closeCurrentTag;
  131. /** @internal */
  132. onattribname(start: number, endIndex: number): void;
  133. /** @internal */
  134. onattribdata(start: number, endIndex: number): void;
  135. /** @internal */
  136. onattribentity(cp: number): void;
  137. /** @internal */
  138. onattribend(quote: QuoteType, endIndex: number): void;
  139. private getInstructionName;
  140. /** @internal */
  141. ondeclaration(start: number, endIndex: number): void;
  142. /** @internal */
  143. onprocessinginstruction(start: number, endIndex: number): void;
  144. /** @internal */
  145. oncomment(start: number, endIndex: number, offset: number): void;
  146. /** @internal */
  147. oncdata(start: number, endIndex: number, offset: number): void;
  148. /** @internal */
  149. onend(): void;
  150. /**
  151. * Resets the parser to a blank state, ready to parse a new HTML document
  152. */
  153. reset(): void;
  154. /**
  155. * Resets the parser, then parses a complete document and
  156. * pushes it to the handler.
  157. *
  158. * @param data Document to parse.
  159. */
  160. parseComplete(data: string): void;
  161. private getSlice;
  162. private shiftBuffer;
  163. /**
  164. * Parses a chunk of data and calls the corresponding callbacks.
  165. *
  166. * @param chunk Chunk to parse.
  167. */
  168. write(chunk: string): void;
  169. /**
  170. * Parses the end of the buffer and clears the stack, calls onend.
  171. *
  172. * @param chunk Optional final chunk to parse.
  173. */
  174. end(chunk?: string): void;
  175. /**
  176. * Pauses parsing. The parser won't emit events until `resume` is called.
  177. */
  178. pause(): void;
  179. /**
  180. * Resumes parsing after `pause` was called.
  181. */
  182. resume(): void;
  183. /**
  184. * Alias of `write`, for backwards compatibility.
  185. *
  186. * @param chunk Chunk to parse.
  187. * @deprecated
  188. */
  189. parseChunk(chunk: string): void;
  190. /**
  191. * Alias of `end`, for backwards compatibility.
  192. *
  193. * @param chunk Optional final chunk to parse.
  194. * @deprecated
  195. */
  196. done(chunk?: string): void;
  197. }
  198. //# sourceMappingURL=Parser.d.ts.map