123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436 |
- /*
- Language: Python
- Description: Python is an interpreted, object-oriented, high-level programming language with dynamic semantics.
- Website: https://www.python.org
- Category: common
- */
- function python(hljs) {
- const regex = hljs.regex;
- const IDENT_RE = /[\p{XID_Start}_]\p{XID_Continue}*/u;
- const RESERVED_WORDS = [
- 'and',
- 'as',
- 'assert',
- 'async',
- 'await',
- 'break',
- 'case',
- 'class',
- 'continue',
- 'def',
- 'del',
- 'elif',
- 'else',
- 'except',
- 'finally',
- 'for',
- 'from',
- 'global',
- 'if',
- 'import',
- 'in',
- 'is',
- 'lambda',
- 'match',
- 'nonlocal|10',
- 'not',
- 'or',
- 'pass',
- 'raise',
- 'return',
- 'try',
- 'while',
- 'with',
- 'yield'
- ];
- const BUILT_INS = [
- '__import__',
- 'abs',
- 'all',
- 'any',
- 'ascii',
- 'bin',
- 'bool',
- 'breakpoint',
- 'bytearray',
- 'bytes',
- 'callable',
- 'chr',
- 'classmethod',
- 'compile',
- 'complex',
- 'delattr',
- 'dict',
- 'dir',
- 'divmod',
- 'enumerate',
- 'eval',
- 'exec',
- 'filter',
- 'float',
- 'format',
- 'frozenset',
- 'getattr',
- 'globals',
- 'hasattr',
- 'hash',
- 'help',
- 'hex',
- 'id',
- 'input',
- 'int',
- 'isinstance',
- 'issubclass',
- 'iter',
- 'len',
- 'list',
- 'locals',
- 'map',
- 'max',
- 'memoryview',
- 'min',
- 'next',
- 'object',
- 'oct',
- 'open',
- 'ord',
- 'pow',
- 'print',
- 'property',
- 'range',
- 'repr',
- 'reversed',
- 'round',
- 'set',
- 'setattr',
- 'slice',
- 'sorted',
- 'staticmethod',
- 'str',
- 'sum',
- 'super',
- 'tuple',
- 'type',
- 'vars',
- 'zip'
- ];
- const LITERALS = [
- '__debug__',
- 'Ellipsis',
- 'False',
- 'None',
- 'NotImplemented',
- 'True'
- ];
- // https://docs.python.org/3/library/typing.html
- // TODO: Could these be supplemented by a CamelCase matcher in certain
- // contexts, leaving these remaining only for relevance hinting?
- const TYPES = [
- "Any",
- "Callable",
- "Coroutine",
- "Dict",
- "List",
- "Literal",
- "Generic",
- "Optional",
- "Sequence",
- "Set",
- "Tuple",
- "Type",
- "Union"
- ];
- const KEYWORDS = {
- $pattern: /[A-Za-z]\w+|__\w+__/,
- keyword: RESERVED_WORDS,
- built_in: BUILT_INS,
- literal: LITERALS,
- type: TYPES
- };
- const PROMPT = {
- className: 'meta',
- begin: /^(>>>|\.\.\.) /
- };
- const SUBST = {
- className: 'subst',
- begin: /\{/,
- end: /\}/,
- keywords: KEYWORDS,
- illegal: /#/
- };
- const LITERAL_BRACKET = {
- begin: /\{\{/,
- relevance: 0
- };
- const STRING = {
- className: 'string',
- contains: [ hljs.BACKSLASH_ESCAPE ],
- variants: [
- {
- begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?'''/,
- end: /'''/,
- contains: [
- hljs.BACKSLASH_ESCAPE,
- PROMPT
- ],
- relevance: 10
- },
- {
- begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?"""/,
- end: /"""/,
- contains: [
- hljs.BACKSLASH_ESCAPE,
- PROMPT
- ],
- relevance: 10
- },
- {
- begin: /([fF][rR]|[rR][fF]|[fF])'''/,
- end: /'''/,
- contains: [
- hljs.BACKSLASH_ESCAPE,
- PROMPT,
- LITERAL_BRACKET,
- SUBST
- ]
- },
- {
- begin: /([fF][rR]|[rR][fF]|[fF])"""/,
- end: /"""/,
- contains: [
- hljs.BACKSLASH_ESCAPE,
- PROMPT,
- LITERAL_BRACKET,
- SUBST
- ]
- },
- {
- begin: /([uU]|[rR])'/,
- end: /'/,
- relevance: 10
- },
- {
- begin: /([uU]|[rR])"/,
- end: /"/,
- relevance: 10
- },
- {
- begin: /([bB]|[bB][rR]|[rR][bB])'/,
- end: /'/
- },
- {
- begin: /([bB]|[bB][rR]|[rR][bB])"/,
- end: /"/
- },
- {
- begin: /([fF][rR]|[rR][fF]|[fF])'/,
- end: /'/,
- contains: [
- hljs.BACKSLASH_ESCAPE,
- LITERAL_BRACKET,
- SUBST
- ]
- },
- {
- begin: /([fF][rR]|[rR][fF]|[fF])"/,
- end: /"/,
- contains: [
- hljs.BACKSLASH_ESCAPE,
- LITERAL_BRACKET,
- SUBST
- ]
- },
- hljs.APOS_STRING_MODE,
- hljs.QUOTE_STRING_MODE
- ]
- };
- // https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals
- const digitpart = '[0-9](_?[0-9])*';
- const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
- // Whitespace after a number (or any lexical token) is needed only if its absence
- // would change the tokenization
- // https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens
- // We deviate slightly, requiring a word boundary or a keyword
- // to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`)
- const lookahead = `\\b|${RESERVED_WORDS.join('|')}`;
- const NUMBER = {
- className: 'number',
- relevance: 0,
- variants: [
- // exponentfloat, pointfloat
- // https://docs.python.org/3.9/reference/lexical_analysis.html#floating-point-literals
- // optionally imaginary
- // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
- // Note: no leading \b because floats can start with a decimal point
- // and we don't want to mishandle e.g. `fn(.5)`,
- // no trailing \b for pointfloat because it can end with a decimal point
- // and we don't want to mishandle e.g. `0..hex()`; this should be safe
- // because both MUST contain a decimal point and so cannot be confused with
- // the interior part of an identifier
- {
- begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})`
- },
- {
- begin: `(${pointfloat})[jJ]?`
- },
- // decinteger, bininteger, octinteger, hexinteger
- // https://docs.python.org/3.9/reference/lexical_analysis.html#integer-literals
- // optionally "long" in Python 2
- // https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals
- // decinteger is optionally imaginary
- // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
- {
- begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})`
- },
- {
- begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})`
- },
- {
- begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})`
- },
- {
- begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})`
- },
- // imagnumber (digitpart-based)
- // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
- {
- begin: `\\b(${digitpart})[jJ](?=${lookahead})`
- }
- ]
- };
- const COMMENT_TYPE = {
- className: "comment",
- begin: regex.lookahead(/# type:/),
- end: /$/,
- keywords: KEYWORDS,
- contains: [
- { // prevent keywords from coloring `type`
- begin: /# type:/
- },
- // comment within a datatype comment includes no keywords
- {
- begin: /#/,
- end: /\b\B/,
- endsWithParent: true
- }
- ]
- };
- const PARAMS = {
- className: 'params',
- variants: [
- // Exclude params in functions without params
- {
- className: "",
- begin: /\(\s*\)/,
- skip: true
- },
- {
- begin: /\(/,
- end: /\)/,
- excludeBegin: true,
- excludeEnd: true,
- keywords: KEYWORDS,
- contains: [
- 'self',
- PROMPT,
- NUMBER,
- STRING,
- hljs.HASH_COMMENT_MODE
- ]
- }
- ]
- };
- SUBST.contains = [
- STRING,
- NUMBER,
- PROMPT
- ];
- return {
- name: 'Python',
- aliases: [
- 'py',
- 'gyp',
- 'ipython'
- ],
- unicodeRegex: true,
- keywords: KEYWORDS,
- illegal: /(<\/|\?)|=>/,
- contains: [
- PROMPT,
- NUMBER,
- {
- // very common convention
- scope: 'variable.language',
- match: /\bself\b/
- },
- {
- // eat "if" prior to string so that it won't accidentally be
- // labeled as an f-string
- beginKeywords: "if",
- relevance: 0
- },
- { match: /\bor\b/, scope: "keyword" },
- STRING,
- COMMENT_TYPE,
- hljs.HASH_COMMENT_MODE,
- {
- match: [
- /\bdef/, /\s+/,
- IDENT_RE,
- ],
- scope: {
- 1: "keyword",
- 3: "title.function"
- },
- contains: [ PARAMS ]
- },
- {
- variants: [
- {
- match: [
- /\bclass/, /\s+/,
- IDENT_RE, /\s*/,
- /\(\s*/, IDENT_RE,/\s*\)/
- ],
- },
- {
- match: [
- /\bclass/, /\s+/,
- IDENT_RE
- ],
- }
- ],
- scope: {
- 1: "keyword",
- 3: "title.class",
- 6: "title.class.inherited",
- }
- },
- {
- className: 'meta',
- begin: /^[\t ]*@/,
- end: /(?=#)|$/,
- contains: [
- NUMBER,
- PARAMS,
- STRING
- ]
- }
- ]
- };
- }
- export { python as default };
|