123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- /**
- * @file Batteries-included version of Cheerio. This module includes several
- * convenience methods for loading documents from various sources.
- */
- export * from './load-parse.js';
- export { contains, merge } from './static.js';
- import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
- import * as htmlparser2 from 'htmlparser2';
- import { ParserStream as Parse5Stream } from 'parse5-parser-stream';
- import { decodeBuffer, DecodeStream, } from 'encoding-sniffer';
- import * as undici from 'undici';
- import MIMEType from 'whatwg-mimetype';
- import { Writable, finished } from 'node:stream';
- import { flattenOptions, } from './options.js';
- import { load } from './load-parse.js';
- /**
- * Sniffs the encoding of a buffer, then creates a querying function bound to a
- * document created from the buffer.
- *
- * @category Loading
- * @example
- *
- * ```js
- * import * as cheerio from 'cheerio';
- *
- * const buffer = fs.readFileSync('index.html');
- * const $ = cheerio.fromBuffer(buffer);
- * ```
- *
- * @param buffer - The buffer to sniff the encoding of.
- * @param options - The options to pass to Cheerio.
- * @returns The loaded document.
- */
- export function loadBuffer(buffer, options = {}) {
- const opts = flattenOptions(options);
- const str = decodeBuffer(buffer, {
- defaultEncoding: (opts === null || opts === void 0 ? void 0 : opts.xmlMode) ? 'utf8' : 'windows-1252',
- ...options.encoding,
- });
- return load(str, opts);
- }
- function _stringStream(options, cb) {
- var _a;
- if (options === null || options === void 0 ? void 0 : options._useHtmlParser2) {
- const parser = htmlparser2.createDocumentStream((err, document) => cb(err, load(document)), options);
- return new Writable({
- decodeStrings: false,
- write(chunk, _encoding, callback) {
- if (typeof chunk !== 'string') {
- throw new TypeError('Expected a string');
- }
- parser.write(chunk);
- callback();
- },
- final(callback) {
- parser.end();
- callback();
- },
- });
- }
- options !== null && options !== void 0 ? options : (options = {});
- (_a = options.treeAdapter) !== null && _a !== void 0 ? _a : (options.treeAdapter = htmlparser2Adapter);
- if (options.scriptingEnabled !== false) {
- options.scriptingEnabled = true;
- }
- const stream = new Parse5Stream(options);
- finished(stream, (err) => cb(err, load(stream.document)));
- return stream;
- }
- /**
- * Creates a stream that parses a sequence of strings into a document.
- *
- * The stream is a `Writable` stream that accepts strings. When the stream is
- * finished, the callback is called with the loaded document.
- *
- * @category Loading
- * @example
- *
- * ```js
- * import * as cheerio from 'cheerio';
- * import * as fs from 'fs';
- *
- * const writeStream = cheerio.stringStream({}, (err, $) => {
- * if (err) {
- * // Handle error
- * }
- *
- * console.log($('h1').text());
- * // Output: Hello, world!
- * });
- *
- * fs.createReadStream('my-document.html', { encoding: 'utf8' }).pipe(
- * writeStream,
- * );
- * ```
- *
- * @param options - The options to pass to Cheerio.
- * @param cb - The callback to call when the stream is finished.
- * @returns The writable stream.
- */
- export function stringStream(options, cb) {
- return _stringStream(flattenOptions(options), cb);
- }
- /**
- * Parses a stream of buffers into a document.
- *
- * The stream is a `Writable` stream that accepts buffers. When the stream is
- * finished, the callback is called with the loaded document.
- *
- * @category Loading
- * @param options - The options to pass to Cheerio.
- * @param cb - The callback to call when the stream is finished.
- * @returns The writable stream.
- */
- export function decodeStream(options, cb) {
- var _a;
- const { encoding = {}, ...cheerioOptions } = options;
- const opts = flattenOptions(cheerioOptions);
- // Set the default encoding to UTF-8 for XML mode
- (_a = encoding.defaultEncoding) !== null && _a !== void 0 ? _a : (encoding.defaultEncoding = (opts === null || opts === void 0 ? void 0 : opts.xmlMode) ? 'utf8' : 'windows-1252');
- const decodeStream = new DecodeStream(encoding);
- const loadStream = _stringStream(opts, cb);
- decodeStream.pipe(loadStream);
- return decodeStream;
- }
- const defaultRequestOptions = {
- method: 'GET',
- // Allow redirects by default
- maxRedirections: 5,
- // NOTE: `throwOnError` currently doesn't work https://github.com/nodejs/undici/issues/1753
- throwOnError: true,
- // Set an Accept header
- headers: {
- accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- },
- };
- /**
- * `fromURL` loads a document from a URL.
- *
- * By default, redirects are allowed and non-2xx responses are rejected.
- *
- * @category Loading
- * @example
- *
- * ```js
- * import * as cheerio from 'cheerio';
- *
- * const $ = await cheerio.fromURL('https://example.com');
- * ```
- *
- * @param url - The URL to load the document from.
- * @param options - The options to pass to Cheerio.
- * @returns The loaded document.
- */
- export async function fromURL(url, options = {}) {
- var _a;
- const { requestOptions = defaultRequestOptions, encoding = {}, ...cheerioOptions } = options;
- let undiciStream;
- // Add headers if none were supplied.
- (_a = requestOptions.headers) !== null && _a !== void 0 ? _a : (requestOptions.headers = defaultRequestOptions.headers);
- const promise = new Promise((resolve, reject) => {
- undiciStream = undici.stream(url, requestOptions, (res) => {
- var _a, _b;
- const contentType = (_a = res.headers['content-type']) !== null && _a !== void 0 ? _a : 'text/html';
- const mimeType = new MIMEType(Array.isArray(contentType) ? contentType[0] : contentType);
- if (!mimeType.isHTML() && !mimeType.isXML()) {
- throw new RangeError(`The content-type "${contentType}" is neither HTML nor XML.`);
- }
- // Forward the charset from the header to the decodeStream.
- encoding.transportLayerEncodingLabel = mimeType.parameters.get('charset');
- /*
- * If we allow redirects, we will have entries in the history.
- * The last entry will be the final URL.
- */
- const history = (_b = res.context) === null || _b === void 0 ? void 0 : _b.history;
- const opts = {
- encoding,
- // Set XML mode based on the MIME type.
- xmlMode: mimeType.isXML(),
- // Set the `baseURL` to the final URL.
- baseURL: history ? history[history.length - 1] : url,
- ...cheerioOptions,
- };
- return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($)));
- });
- });
- // Let's make sure the request is completed before returning the promise.
- await undiciStream;
- return promise;
- }
- //# sourceMappingURL=index.js.map
|