dom.js 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. "use strict";
  2. var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
  3. if (k2 === undefined) k2 = k;
  4. var desc = Object.getOwnPropertyDescriptor(m, k);
  5. if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
  6. desc = { enumerable: true, get: function() { return m[k]; } };
  7. }
  8. Object.defineProperty(o, k2, desc);
  9. }) : (function(o, m, k, k2) {
  10. if (k2 === undefined) k2 = k;
  11. o[k2] = m[k];
  12. }));
  13. var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
  14. Object.defineProperty(o, "default", { enumerable: true, value: v });
  15. }) : function(o, v) {
  16. o["default"] = v;
  17. });
  18. var __importStar = (this && this.__importStar) || function (mod) {
  19. if (mod && mod.__esModule) return mod;
  20. var result = {};
  21. if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
  22. __setModuleDefault(result, mod);
  23. return result;
  24. };
  25. var __importDefault = (this && this.__importDefault) || function (mod) {
  26. return (mod && mod.__esModule) ? mod : { "default": mod };
  27. };
  28. Object.defineProperty(exports, "__esModule", { value: true });
  29. exports.loadHtml = loadHtml;
  30. exports.parse = parse;
  31. exports.root = root;
  32. exports.textNode = textNode;
  33. exports.cleanup = cleanup;
  34. const lodash_1 = __importDefault(require("lodash"));
  35. const cheerio = __importStar(require("cheerio"));
  36. /**
  37. * Load an HTML string and return a cheerio instance
  38. * @param html
  39. */
  40. function loadHtml(html) {
  41. return cheerio.load(html, { _useHtmlParser2: true });
  42. }
  43. /**
  44. Parse an HTML string and return its content
  45. @param html
  46. @return {cheerio.Root}
  47. */
  48. function parse(html) {
  49. const $ = cheerio.load(html, { _useHtmlParser2: true });
  50. const $el = $("html, body").first();
  51. return ($el.length > 0 ? $el : $);
  52. }
  53. /**
  54. Return main element for a DOM
  55. @param {cheerio.DOM}
  56. @return {cheerio.Node}
  57. */
  58. function root($) {
  59. const $el = $("html, body, > div").first();
  60. return $el.length > 0 ? $el : $.root();
  61. }
  62. /**
  63. Return text node of an element
  64. @param {cheerio.Node}
  65. @return {string}
  66. */
  67. function textNode($el) {
  68. return lodash_1.default.reduce($el.children, (text, e) => {
  69. if (e.type == "text")
  70. text += e.data;
  71. return text;
  72. }, "");
  73. }
  74. /**
  75. Cleanup a DOM by removing all useless divs
  76. @param {cheerio.Node}
  77. @param {cheerio.DOM}
  78. @return {cheerio.Node}
  79. */
  80. function cleanup($el, $) {
  81. $el.find("div").each(function () {
  82. const $div = $(this);
  83. cleanup($div, $);
  84. $div.replaceWith($div.html());
  85. });
  86. return $el;
  87. }