r.js 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. /*
  2. Language: R
  3. Description: R is a free software environment for statistical computing and graphics.
  4. Author: Joe Cheng <joe@rstudio.org>
  5. Contributors: Konrad Rudolph <konrad.rudolph@gmail.com>
  6. Website: https://www.r-project.org
  7. Category: common,scientific
  8. */
  9. /** @type LanguageFn */
  10. function r(hljs) {
  11. const regex = hljs.regex;
  12. // Identifiers in R cannot start with `_`, but they can start with `.` if it
  13. // is not immediately followed by a digit.
  14. // R also supports quoted identifiers, which are near-arbitrary sequences
  15. // delimited by backticks (`…`), which may contain escape sequences. These are
  16. // handled in a separate mode. See `test/markup/r/names.txt` for examples.
  17. // FIXME: Support Unicode identifiers.
  18. const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
  19. const NUMBER_TYPES_RE = regex.either(
  20. // Special case: only hexadecimal binary powers can contain fractions
  21. /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
  22. // Hexadecimal numbers without fraction and optional binary power
  23. /0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
  24. // Decimal numbers
  25. /(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
  26. );
  27. const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
  28. const PUNCTUATION_RE = regex.either(
  29. /[()]/,
  30. /[{}]/,
  31. /\[\[/,
  32. /[[\]]/,
  33. /\\/,
  34. /,/
  35. );
  36. return {
  37. name: 'R',
  38. keywords: {
  39. $pattern: IDENT_RE,
  40. keyword:
  41. 'function if in break next repeat else for while',
  42. literal:
  43. 'NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 '
  44. + 'NA_character_|10 NA_complex_|10',
  45. built_in:
  46. // Builtin constants
  47. 'LETTERS letters month.abb month.name pi T F '
  48. // Primitive functions
  49. // These are all the functions in `base` that are implemented as a
  50. // `.Primitive`, minus those functions that are also keywords.
  51. + 'abs acos acosh all any anyNA Arg as.call as.character '
  52. + 'as.complex as.double as.environment as.integer as.logical '
  53. + 'as.null.default as.numeric as.raw asin asinh atan atanh attr '
  54. + 'attributes baseenv browser c call ceiling class Conj cos cosh '
  55. + 'cospi cummax cummin cumprod cumsum digamma dim dimnames '
  56. + 'emptyenv exp expression floor forceAndCall gamma gc.time '
  57. + 'globalenv Im interactive invisible is.array is.atomic is.call '
  58. + 'is.character is.complex is.double is.environment is.expression '
  59. + 'is.finite is.function is.infinite is.integer is.language '
  60. + 'is.list is.logical is.matrix is.na is.name is.nan is.null '
  61. + 'is.numeric is.object is.pairlist is.raw is.recursive is.single '
  62. + 'is.symbol lazyLoadDBfetch length lgamma list log max min '
  63. + 'missing Mod names nargs nzchar oldClass on.exit pos.to.env '
  64. + 'proc.time prod quote range Re rep retracemem return round '
  65. + 'seq_along seq_len seq.int sign signif sin sinh sinpi sqrt '
  66. + 'standardGeneric substitute sum switch tan tanh tanpi tracemem '
  67. + 'trigamma trunc unclass untracemem UseMethod xtfrm',
  68. },
  69. contains: [
  70. // Roxygen comments
  71. hljs.COMMENT(
  72. /#'/,
  73. /$/,
  74. { contains: [
  75. {
  76. // Handle `@examples` separately to cause all subsequent code
  77. // until the next `@`-tag on its own line to be kept as-is,
  78. // preventing highlighting. This code is example R code, so nested
  79. // doctags shouldn’t be treated as such. See
  80. // `test/markup/r/roxygen.txt` for an example.
  81. scope: 'doctag',
  82. match: /@examples/,
  83. starts: {
  84. end: regex.lookahead(regex.either(
  85. // end if another doc comment
  86. /\n^#'\s*(?=@[a-zA-Z]+)/,
  87. // or a line with no comment
  88. /\n^(?!#')/
  89. )),
  90. endsParent: true
  91. }
  92. },
  93. {
  94. // Handle `@param` to highlight the parameter name following
  95. // after.
  96. scope: 'doctag',
  97. begin: '@param',
  98. end: /$/,
  99. contains: [
  100. {
  101. scope: 'variable',
  102. variants: [
  103. { match: IDENT_RE },
  104. { match: /`(?:\\.|[^`\\])+`/ }
  105. ],
  106. endsParent: true
  107. }
  108. ]
  109. },
  110. {
  111. scope: 'doctag',
  112. match: /@[a-zA-Z]+/
  113. },
  114. {
  115. scope: 'keyword',
  116. match: /\\[a-zA-Z]+/
  117. }
  118. ] }
  119. ),
  120. hljs.HASH_COMMENT_MODE,
  121. {
  122. scope: 'string',
  123. contains: [ hljs.BACKSLASH_ESCAPE ],
  124. variants: [
  125. hljs.END_SAME_AS_BEGIN({
  126. begin: /[rR]"(-*)\(/,
  127. end: /\)(-*)"/
  128. }),
  129. hljs.END_SAME_AS_BEGIN({
  130. begin: /[rR]"(-*)\{/,
  131. end: /\}(-*)"/
  132. }),
  133. hljs.END_SAME_AS_BEGIN({
  134. begin: /[rR]"(-*)\[/,
  135. end: /\](-*)"/
  136. }),
  137. hljs.END_SAME_AS_BEGIN({
  138. begin: /[rR]'(-*)\(/,
  139. end: /\)(-*)'/
  140. }),
  141. hljs.END_SAME_AS_BEGIN({
  142. begin: /[rR]'(-*)\{/,
  143. end: /\}(-*)'/
  144. }),
  145. hljs.END_SAME_AS_BEGIN({
  146. begin: /[rR]'(-*)\[/,
  147. end: /\](-*)'/
  148. }),
  149. {
  150. begin: '"',
  151. end: '"',
  152. relevance: 0
  153. },
  154. {
  155. begin: "'",
  156. end: "'",
  157. relevance: 0
  158. }
  159. ],
  160. },
  161. // Matching numbers immediately following punctuation and operators is
  162. // tricky since we need to look at the character ahead of a number to
  163. // ensure the number is not part of an identifier, and we cannot use
  164. // negative look-behind assertions. So instead we explicitly handle all
  165. // possible combinations of (operator|punctuation), number.
  166. // TODO: replace with negative look-behind when available
  167. // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
  168. // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
  169. // { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
  170. {
  171. relevance: 0,
  172. variants: [
  173. {
  174. scope: {
  175. 1: 'operator',
  176. 2: 'number'
  177. },
  178. match: [
  179. OPERATORS_RE,
  180. NUMBER_TYPES_RE
  181. ]
  182. },
  183. {
  184. scope: {
  185. 1: 'operator',
  186. 2: 'number'
  187. },
  188. match: [
  189. /%[^%]*%/,
  190. NUMBER_TYPES_RE
  191. ]
  192. },
  193. {
  194. scope: {
  195. 1: 'punctuation',
  196. 2: 'number'
  197. },
  198. match: [
  199. PUNCTUATION_RE,
  200. NUMBER_TYPES_RE
  201. ]
  202. },
  203. {
  204. scope: { 2: 'number' },
  205. match: [
  206. /[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document
  207. NUMBER_TYPES_RE
  208. ]
  209. }
  210. ]
  211. },
  212. // Operators/punctuation when they're not directly followed by numbers
  213. {
  214. // Relevance boost for the most common assignment form.
  215. scope: { 3: 'operator' },
  216. match: [
  217. IDENT_RE,
  218. /\s+/,
  219. /<-/,
  220. /\s+/
  221. ]
  222. },
  223. {
  224. scope: 'operator',
  225. relevance: 0,
  226. variants: [
  227. { match: OPERATORS_RE },
  228. { match: /%[^%]*%/ }
  229. ]
  230. },
  231. {
  232. scope: 'punctuation',
  233. relevance: 0,
  234. match: PUNCTUATION_RE
  235. },
  236. {
  237. // Escaped identifier
  238. begin: '`',
  239. end: '`',
  240. contains: [ { begin: /\\./ } ]
  241. }
  242. ]
  243. };
  244. }
  245. export { r as default };