index.js 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import { Writable } from 'node:stream';
  2. import { Parser } from 'parse5';
  3. /* eslint-disable unicorn/consistent-function-scoping -- The rule seems to be broken here. */
  4. /**
  5. * Streaming HTML parser with scripting support.
  6. * A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
  7. *
  8. * @example
  9. *
  10. * ```js
  11. * const ParserStream = require('parse5-parser-stream');
  12. * const http = require('http');
  13. * const { finished } = require('node:stream');
  14. *
  15. * // Fetch the page content and obtain it's <head> node
  16. * http.get('http://inikulin.github.io/parse5/', res => {
  17. * const parser = new ParserStream();
  18. *
  19. * finished(parser, () => {
  20. * console.log(parser.document.childNodes[1].childNodes[0].tagName); //> 'head'
  21. * });
  22. *
  23. * res.pipe(parser);
  24. * });
  25. * ```
  26. *
  27. */
  28. export class ParserStream extends Writable {
  29. static getFragmentStream(fragmentContext, options) {
  30. const parser = Parser.getFragmentParser(fragmentContext, options);
  31. const stream = new ParserStream(options, parser);
  32. return stream;
  33. }
  34. /** The resulting document node. */
  35. get document() {
  36. return this.parser.document;
  37. }
  38. getFragment() {
  39. return this.parser.getFragment();
  40. }
  41. /**
  42. * @param options Parsing options.
  43. */
  44. constructor(options, parser = new Parser(options)) {
  45. super({ decodeStrings: false });
  46. this.parser = parser;
  47. this.lastChunkWritten = false;
  48. this.writeCallback = undefined;
  49. this.pendingHtmlInsertions = [];
  50. const resume = () => {
  51. for (let i = this.pendingHtmlInsertions.length - 1; i >= 0; i--) {
  52. this.parser.tokenizer.insertHtmlAtCurrentPos(this.pendingHtmlInsertions[i]);
  53. }
  54. this.pendingHtmlInsertions.length = 0;
  55. //NOTE: keep parsing if we don't wait for the next input chunk
  56. this.parser.tokenizer.resume(this.writeCallback);
  57. };
  58. const documentWrite = (html) => {
  59. if (!this.parser.stopped) {
  60. this.pendingHtmlInsertions.push(html);
  61. }
  62. };
  63. const scriptHandler = (scriptElement) => {
  64. if (this.listenerCount('script') > 0) {
  65. this.parser.tokenizer.pause();
  66. this.emit('script', scriptElement, documentWrite, resume);
  67. }
  68. };
  69. this.parser.scriptHandler = scriptHandler;
  70. }
  71. //WritableStream implementation
  72. _write(chunk, _encoding, callback) {
  73. if (typeof chunk !== 'string') {
  74. throw new TypeError('Parser can work only with string streams.');
  75. }
  76. this.writeCallback = callback;
  77. this.parser.tokenizer.write(chunk, this.lastChunkWritten, this.writeCallback);
  78. }
  79. // TODO [engine:node@>=16]: Due to issues with Node < 16, we are overriding `end` instead of `_final`.
  80. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  81. end(chunk, encoding, callback) {
  82. this.lastChunkWritten = true;
  83. super.end(chunk || '', encoding, callback);
  84. }
  85. }
  86. //# sourceMappingURL=index.js.map