summary.js 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. "use strict";
  2. var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
  3. if (k2 === undefined) k2 = k;
  4. var desc = Object.getOwnPropertyDescriptor(m, k);
  5. if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
  6. desc = { enumerable: true, get: function() { return m[k]; } };
  7. }
  8. Object.defineProperty(o, k2, desc);
  9. }) : (function(o, m, k, k2) {
  10. if (k2 === undefined) k2 = k;
  11. o[k2] = m[k];
  12. }));
  13. var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
  14. Object.defineProperty(o, "default", { enumerable: true, value: v });
  15. }) : function(o, v) {
  16. o["default"] = v;
  17. });
  18. var __importStar = (this && this.__importStar) || function (mod) {
  19. if (mod && mod.__esModule) return mod;
  20. var result = {};
  21. if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
  22. __setModuleDefault(result, mod);
  23. return result;
  24. };
  25. Object.defineProperty(exports, "__esModule", { value: true });
  26. const dom = __importStar(require("./dom"));
  27. const SELECTOR_LIST = "ol, ul";
  28. const SELECTOR_LINK = "> a, p > a";
  29. const SELECTOR_PART = "h2, h3, h4";
  30. /**
  31. Find a list
  32. @param {cheerio.Node}
  33. @return {cheerio.Node}
  34. */
  35. function findList($parent) {
  36. const $container = $parent.children(".olist");
  37. if ($container.length > 0)
  38. $parent = $container.first();
  39. return $parent.children(SELECTOR_LIST);
  40. }
  41. /**
  42. Parse a ul list and return list of chapters recursvely
  43. @param {cheerio.Node}
  44. @param {cheerio.DOM}
  45. @return {Array}
  46. */
  47. function parseList($ul, $) {
  48. const articles = [];
  49. $ul.children("li").each(function () {
  50. const article = {};
  51. const $li = $(this);
  52. // Get text for the entry
  53. const $p = $li.children("p");
  54. article.title = ($p.text() || dom.textNode($li.get(0))).trim();
  55. // Parse link
  56. const $a = $li.find(SELECTOR_LINK);
  57. if ($a.length > 0) {
  58. article.title = $a.first().text();
  59. article.ref = $a.attr("href").replace(/\\/g, "/").replace(/^\/+/, "");
  60. }
  61. // Sub articles
  62. const $sub = findList($li);
  63. article.articles = parseList($sub, $);
  64. if (!article.title)
  65. return;
  66. articles.push(article);
  67. });
  68. return articles;
  69. }
  70. /**
  71. Find all parts and their corresponding lists
  72. @param {cheerio.Node}
  73. @param {cheerio.DOM}
  74. @return {Array<{title: String, list: cheerio.Node}>}
  75. */
  76. function findParts($parent, $) {
  77. // Find parts and lists
  78. // TODO asciidoc compatibility
  79. const partsAndLists = $parent.children(`${SELECTOR_LIST}, ${SELECTOR_PART}`);
  80. // Group each part with the list after
  81. const parts = [];
  82. let previousPart = null;
  83. partsAndLists.each((i, el) => {
  84. if (isPartNode(el)) {
  85. if (previousPart !== null) {
  86. // The previous part was empty
  87. parts.push(previousPart);
  88. }
  89. previousPart = {
  90. title: getPartTitle(el, $),
  91. list: null
  92. };
  93. }
  94. else {
  95. // It is a list
  96. if (previousPart !== null) {
  97. previousPart.list = el;
  98. }
  99. else {
  100. previousPart = {
  101. title: "",
  102. list: el
  103. };
  104. }
  105. parts.push(previousPart);
  106. previousPart = null;
  107. }
  108. });
  109. // Last part might be empty
  110. if (previousPart !== null) {
  111. parts.push(previousPart);
  112. }
  113. return parts;
  114. }
  115. /**
  116. True if the element is a part
  117. @param el
  118. @return {boolean}
  119. */
  120. function isPartNode(el) {
  121. return SELECTOR_PART.indexOf(el.name) !== -1;
  122. }
  123. /**
  124. Parse the title of a part element
  125. @param el
  126. @param {cheerio.DOM} $
  127. @return {string}
  128. */
  129. function getPartTitle(el, $) {
  130. return $(el).text().trim();
  131. }
  132. /**
  133. Parse an HTML content into a tree of articles/parts
  134. @param {string} html
  135. @return {Object}
  136. */
  137. function parseSummary(html) {
  138. const $ = dom.parse(html);
  139. const $root = dom.cleanup(dom.root($), $);
  140. const parts = findParts($root, $);
  141. // Parse each list
  142. const parsedParts = [];
  143. let part;
  144. for (let i = 0; i < parts.length; ++i) {
  145. part = parts[i];
  146. parsedParts.push({
  147. title: part.title,
  148. articles: parseList($(part.list), $)
  149. });
  150. }
  151. return {
  152. parts: parsedParts
  153. };
  154. }
  155. exports.default = parseSummary;