xml.js 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. /*
  2. Language: HTML, XML
  3. Website: https://www.w3.org/XML/
  4. Category: common, web
  5. Audit: 2020
  6. */
  7. /** @type LanguageFn */
  8. function xml(hljs) {
  9. const regex = hljs.regex;
  10. // XML names can have the following additional letters: https://www.w3.org/TR/xml/#NT-NameChar
  11. // OTHER_NAME_CHARS = /[:\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]/;
  12. // Element names start with NAME_START_CHAR followed by optional other Unicode letters, ASCII digits, hyphens, underscores, and periods
  13. // const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);;
  14. // const XML_IDENT_RE = /[A-Z_a-z:\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]+/;
  15. // const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);
  16. // however, to cater for performance and more Unicode support rely simply on the Unicode letter class
  17. const TAG_NAME_RE = regex.concat(/[\p{L}_]/u, regex.optional(/[\p{L}0-9_.-]*:/u), /[\p{L}0-9_.-]*/u);
  18. const XML_IDENT_RE = /[\p{L}0-9._:-]+/u;
  19. const XML_ENTITIES = {
  20. className: 'symbol',
  21. begin: /&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;/
  22. };
  23. const XML_META_KEYWORDS = {
  24. begin: /\s/,
  25. contains: [
  26. {
  27. className: 'keyword',
  28. begin: /#?[a-z_][a-z1-9_-]+/,
  29. illegal: /\n/
  30. }
  31. ]
  32. };
  33. const XML_META_PAR_KEYWORDS = hljs.inherit(XML_META_KEYWORDS, {
  34. begin: /\(/,
  35. end: /\)/
  36. });
  37. const APOS_META_STRING_MODE = hljs.inherit(hljs.APOS_STRING_MODE, { className: 'string' });
  38. const QUOTE_META_STRING_MODE = hljs.inherit(hljs.QUOTE_STRING_MODE, { className: 'string' });
  39. const TAG_INTERNALS = {
  40. endsWithParent: true,
  41. illegal: /</,
  42. relevance: 0,
  43. contains: [
  44. {
  45. className: 'attr',
  46. begin: XML_IDENT_RE,
  47. relevance: 0
  48. },
  49. {
  50. begin: /=\s*/,
  51. relevance: 0,
  52. contains: [
  53. {
  54. className: 'string',
  55. endsParent: true,
  56. variants: [
  57. {
  58. begin: /"/,
  59. end: /"/,
  60. contains: [ XML_ENTITIES ]
  61. },
  62. {
  63. begin: /'/,
  64. end: /'/,
  65. contains: [ XML_ENTITIES ]
  66. },
  67. { begin: /[^\s"'=<>`]+/ }
  68. ]
  69. }
  70. ]
  71. }
  72. ]
  73. };
  74. return {
  75. name: 'HTML, XML',
  76. aliases: [
  77. 'html',
  78. 'xhtml',
  79. 'rss',
  80. 'atom',
  81. 'xjb',
  82. 'xsd',
  83. 'xsl',
  84. 'plist',
  85. 'wsf',
  86. 'svg'
  87. ],
  88. case_insensitive: true,
  89. unicodeRegex: true,
  90. contains: [
  91. {
  92. className: 'meta',
  93. begin: /<![a-z]/,
  94. end: />/,
  95. relevance: 10,
  96. contains: [
  97. XML_META_KEYWORDS,
  98. QUOTE_META_STRING_MODE,
  99. APOS_META_STRING_MODE,
  100. XML_META_PAR_KEYWORDS,
  101. {
  102. begin: /\[/,
  103. end: /\]/,
  104. contains: [
  105. {
  106. className: 'meta',
  107. begin: /<![a-z]/,
  108. end: />/,
  109. contains: [
  110. XML_META_KEYWORDS,
  111. XML_META_PAR_KEYWORDS,
  112. QUOTE_META_STRING_MODE,
  113. APOS_META_STRING_MODE
  114. ]
  115. }
  116. ]
  117. }
  118. ]
  119. },
  120. hljs.COMMENT(
  121. /<!--/,
  122. /-->/,
  123. { relevance: 10 }
  124. ),
  125. {
  126. begin: /<!\[CDATA\[/,
  127. end: /\]\]>/,
  128. relevance: 10
  129. },
  130. XML_ENTITIES,
  131. // xml processing instructions
  132. {
  133. className: 'meta',
  134. end: /\?>/,
  135. variants: [
  136. {
  137. begin: /<\?xml/,
  138. relevance: 10,
  139. contains: [
  140. QUOTE_META_STRING_MODE
  141. ]
  142. },
  143. {
  144. begin: /<\?[a-z][a-z0-9]+/,
  145. }
  146. ]
  147. },
  148. {
  149. className: 'tag',
  150. /*
  151. The lookahead pattern (?=...) ensures that 'begin' only matches
  152. '<style' as a single word, followed by a whitespace or an
  153. ending bracket.
  154. */
  155. begin: /<style(?=\s|>)/,
  156. end: />/,
  157. keywords: { name: 'style' },
  158. contains: [ TAG_INTERNALS ],
  159. starts: {
  160. end: /<\/style>/,
  161. returnEnd: true,
  162. subLanguage: [
  163. 'css',
  164. 'xml'
  165. ]
  166. }
  167. },
  168. {
  169. className: 'tag',
  170. // See the comment in the <style tag about the lookahead pattern
  171. begin: /<script(?=\s|>)/,
  172. end: />/,
  173. keywords: { name: 'script' },
  174. contains: [ TAG_INTERNALS ],
  175. starts: {
  176. end: /<\/script>/,
  177. returnEnd: true,
  178. subLanguage: [
  179. 'javascript',
  180. 'handlebars',
  181. 'xml'
  182. ]
  183. }
  184. },
  185. // we need this for now for jSX
  186. {
  187. className: 'tag',
  188. begin: /<>|<\/>/
  189. },
  190. // open tag
  191. {
  192. className: 'tag',
  193. begin: regex.concat(
  194. /</,
  195. regex.lookahead(regex.concat(
  196. TAG_NAME_RE,
  197. // <tag/>
  198. // <tag>
  199. // <tag ...
  200. regex.either(/\/>/, />/, /\s/)
  201. ))
  202. ),
  203. end: /\/?>/,
  204. contains: [
  205. {
  206. className: 'name',
  207. begin: TAG_NAME_RE,
  208. relevance: 0,
  209. starts: TAG_INTERNALS
  210. }
  211. ]
  212. },
  213. // close tag
  214. {
  215. className: 'tag',
  216. begin: regex.concat(
  217. /<\//,
  218. regex.lookahead(regex.concat(
  219. TAG_NAME_RE, />/
  220. ))
  221. ),
  222. contains: [
  223. {
  224. className: 'name',
  225. begin: TAG_NAME_RE,
  226. relevance: 0
  227. },
  228. {
  229. begin: />/,
  230. relevance: 0,
  231. endsParent: true
  232. }
  233. ]
  234. }
  235. ]
  236. };
  237. }
  238. export { xml as default };