123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257 |
- /*
- Language: R
- Description: R is a free software environment for statistical computing and graphics.
- Author: Joe Cheng <joe@rstudio.org>
- Contributors: Konrad Rudolph <konrad.rudolph@gmail.com>
- Website: https://www.r-project.org
- Category: common,scientific
- */
- /** @type LanguageFn */
- function r(hljs) {
- const regex = hljs.regex;
- // Identifiers in R cannot start with `_`, but they can start with `.` if it
- // is not immediately followed by a digit.
- // R also supports quoted identifiers, which are near-arbitrary sequences
- // delimited by backticks (`…`), which may contain escape sequences. These are
- // handled in a separate mode. See `test/markup/r/names.txt` for examples.
- // FIXME: Support Unicode identifiers.
- const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
- const NUMBER_TYPES_RE = regex.either(
- // Special case: only hexadecimal binary powers can contain fractions
- /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
- // Hexadecimal numbers without fraction and optional binary power
- /0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
- // Decimal numbers
- /(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
- );
- const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
- const PUNCTUATION_RE = regex.either(
- /[()]/,
- /[{}]/,
- /\[\[/,
- /[[\]]/,
- /\\/,
- /,/
- );
- return {
- name: 'R',
- keywords: {
- $pattern: IDENT_RE,
- keyword:
- 'function if in break next repeat else for while',
- literal:
- 'NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 '
- + 'NA_character_|10 NA_complex_|10',
- built_in:
- // Builtin constants
- 'LETTERS letters month.abb month.name pi T F '
- // Primitive functions
- // These are all the functions in `base` that are implemented as a
- // `.Primitive`, minus those functions that are also keywords.
- + 'abs acos acosh all any anyNA Arg as.call as.character '
- + 'as.complex as.double as.environment as.integer as.logical '
- + 'as.null.default as.numeric as.raw asin asinh atan atanh attr '
- + 'attributes baseenv browser c call ceiling class Conj cos cosh '
- + 'cospi cummax cummin cumprod cumsum digamma dim dimnames '
- + 'emptyenv exp expression floor forceAndCall gamma gc.time '
- + 'globalenv Im interactive invisible is.array is.atomic is.call '
- + 'is.character is.complex is.double is.environment is.expression '
- + 'is.finite is.function is.infinite is.integer is.language '
- + 'is.list is.logical is.matrix is.na is.name is.nan is.null '
- + 'is.numeric is.object is.pairlist is.raw is.recursive is.single '
- + 'is.symbol lazyLoadDBfetch length lgamma list log max min '
- + 'missing Mod names nargs nzchar oldClass on.exit pos.to.env '
- + 'proc.time prod quote range Re rep retracemem return round '
- + 'seq_along seq_len seq.int sign signif sin sinh sinpi sqrt '
- + 'standardGeneric substitute sum switch tan tanh tanpi tracemem '
- + 'trigamma trunc unclass untracemem UseMethod xtfrm',
- },
- contains: [
- // Roxygen comments
- hljs.COMMENT(
- /#'/,
- /$/,
- { contains: [
- {
- // Handle `@examples` separately to cause all subsequent code
- // until the next `@`-tag on its own line to be kept as-is,
- // preventing highlighting. This code is example R code, so nested
- // doctags shouldn’t be treated as such. See
- // `test/markup/r/roxygen.txt` for an example.
- scope: 'doctag',
- match: /@examples/,
- starts: {
- end: regex.lookahead(regex.either(
- // end if another doc comment
- /\n^#'\s*(?=@[a-zA-Z]+)/,
- // or a line with no comment
- /\n^(?!#')/
- )),
- endsParent: true
- }
- },
- {
- // Handle `@param` to highlight the parameter name following
- // after.
- scope: 'doctag',
- begin: '@param',
- end: /$/,
- contains: [
- {
- scope: 'variable',
- variants: [
- { match: IDENT_RE },
- { match: /`(?:\\.|[^`\\])+`/ }
- ],
- endsParent: true
- }
- ]
- },
- {
- scope: 'doctag',
- match: /@[a-zA-Z]+/
- },
- {
- scope: 'keyword',
- match: /\\[a-zA-Z]+/
- }
- ] }
- ),
- hljs.HASH_COMMENT_MODE,
- {
- scope: 'string',
- contains: [ hljs.BACKSLASH_ESCAPE ],
- variants: [
- hljs.END_SAME_AS_BEGIN({
- begin: /[rR]"(-*)\(/,
- end: /\)(-*)"/
- }),
- hljs.END_SAME_AS_BEGIN({
- begin: /[rR]"(-*)\{/,
- end: /\}(-*)"/
- }),
- hljs.END_SAME_AS_BEGIN({
- begin: /[rR]"(-*)\[/,
- end: /\](-*)"/
- }),
- hljs.END_SAME_AS_BEGIN({
- begin: /[rR]'(-*)\(/,
- end: /\)(-*)'/
- }),
- hljs.END_SAME_AS_BEGIN({
- begin: /[rR]'(-*)\{/,
- end: /\}(-*)'/
- }),
- hljs.END_SAME_AS_BEGIN({
- begin: /[rR]'(-*)\[/,
- end: /\](-*)'/
- }),
- {
- begin: '"',
- end: '"',
- relevance: 0
- },
- {
- begin: "'",
- end: "'",
- relevance: 0
- }
- ],
- },
- // Matching numbers immediately following punctuation and operators is
- // tricky since we need to look at the character ahead of a number to
- // ensure the number is not part of an identifier, and we cannot use
- // negative look-behind assertions. So instead we explicitly handle all
- // possible combinations of (operator|punctuation), number.
- // TODO: replace with negative look-behind when available
- // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
- // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
- // { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
- {
- relevance: 0,
- variants: [
- {
- scope: {
- 1: 'operator',
- 2: 'number'
- },
- match: [
- OPERATORS_RE,
- NUMBER_TYPES_RE
- ]
- },
- {
- scope: {
- 1: 'operator',
- 2: 'number'
- },
- match: [
- /%[^%]*%/,
- NUMBER_TYPES_RE
- ]
- },
- {
- scope: {
- 1: 'punctuation',
- 2: 'number'
- },
- match: [
- PUNCTUATION_RE,
- NUMBER_TYPES_RE
- ]
- },
- {
- scope: { 2: 'number' },
- match: [
- /[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document
- NUMBER_TYPES_RE
- ]
- }
- ]
- },
- // Operators/punctuation when they're not directly followed by numbers
- {
- // Relevance boost for the most common assignment form.
- scope: { 3: 'operator' },
- match: [
- IDENT_RE,
- /\s+/,
- /<-/,
- /\s+/
- ]
- },
- {
- scope: 'operator',
- relevance: 0,
- variants: [
- { match: OPERATORS_RE },
- { match: /%[^%]*%/ }
- ]
- },
- {
- scope: 'punctuation',
- relevance: 0,
- match: PUNCTUATION_RE
- },
- {
- // Escaped identifier
- begin: '`',
- end: '`',
- contains: [ { begin: /\\./ } ]
- }
- ]
- };
- }
- export { r as default };
|