python.js 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. /*
  2. Language: Python
  3. Description: Python is an interpreted, object-oriented, high-level programming language with dynamic semantics.
  4. Website: https://www.python.org
  5. Category: common
  6. */
  7. function python(hljs) {
  8. const regex = hljs.regex;
  9. const IDENT_RE = /[\p{XID_Start}_]\p{XID_Continue}*/u;
  10. const RESERVED_WORDS = [
  11. 'and',
  12. 'as',
  13. 'assert',
  14. 'async',
  15. 'await',
  16. 'break',
  17. 'case',
  18. 'class',
  19. 'continue',
  20. 'def',
  21. 'del',
  22. 'elif',
  23. 'else',
  24. 'except',
  25. 'finally',
  26. 'for',
  27. 'from',
  28. 'global',
  29. 'if',
  30. 'import',
  31. 'in',
  32. 'is',
  33. 'lambda',
  34. 'match',
  35. 'nonlocal|10',
  36. 'not',
  37. 'or',
  38. 'pass',
  39. 'raise',
  40. 'return',
  41. 'try',
  42. 'while',
  43. 'with',
  44. 'yield'
  45. ];
  46. const BUILT_INS = [
  47. '__import__',
  48. 'abs',
  49. 'all',
  50. 'any',
  51. 'ascii',
  52. 'bin',
  53. 'bool',
  54. 'breakpoint',
  55. 'bytearray',
  56. 'bytes',
  57. 'callable',
  58. 'chr',
  59. 'classmethod',
  60. 'compile',
  61. 'complex',
  62. 'delattr',
  63. 'dict',
  64. 'dir',
  65. 'divmod',
  66. 'enumerate',
  67. 'eval',
  68. 'exec',
  69. 'filter',
  70. 'float',
  71. 'format',
  72. 'frozenset',
  73. 'getattr',
  74. 'globals',
  75. 'hasattr',
  76. 'hash',
  77. 'help',
  78. 'hex',
  79. 'id',
  80. 'input',
  81. 'int',
  82. 'isinstance',
  83. 'issubclass',
  84. 'iter',
  85. 'len',
  86. 'list',
  87. 'locals',
  88. 'map',
  89. 'max',
  90. 'memoryview',
  91. 'min',
  92. 'next',
  93. 'object',
  94. 'oct',
  95. 'open',
  96. 'ord',
  97. 'pow',
  98. 'print',
  99. 'property',
  100. 'range',
  101. 'repr',
  102. 'reversed',
  103. 'round',
  104. 'set',
  105. 'setattr',
  106. 'slice',
  107. 'sorted',
  108. 'staticmethod',
  109. 'str',
  110. 'sum',
  111. 'super',
  112. 'tuple',
  113. 'type',
  114. 'vars',
  115. 'zip'
  116. ];
  117. const LITERALS = [
  118. '__debug__',
  119. 'Ellipsis',
  120. 'False',
  121. 'None',
  122. 'NotImplemented',
  123. 'True'
  124. ];
  125. // https://docs.python.org/3/library/typing.html
  126. // TODO: Could these be supplemented by a CamelCase matcher in certain
  127. // contexts, leaving these remaining only for relevance hinting?
  128. const TYPES = [
  129. "Any",
  130. "Callable",
  131. "Coroutine",
  132. "Dict",
  133. "List",
  134. "Literal",
  135. "Generic",
  136. "Optional",
  137. "Sequence",
  138. "Set",
  139. "Tuple",
  140. "Type",
  141. "Union"
  142. ];
  143. const KEYWORDS = {
  144. $pattern: /[A-Za-z]\w+|__\w+__/,
  145. keyword: RESERVED_WORDS,
  146. built_in: BUILT_INS,
  147. literal: LITERALS,
  148. type: TYPES
  149. };
  150. const PROMPT = {
  151. className: 'meta',
  152. begin: /^(>>>|\.\.\.) /
  153. };
  154. const SUBST = {
  155. className: 'subst',
  156. begin: /\{/,
  157. end: /\}/,
  158. keywords: KEYWORDS,
  159. illegal: /#/
  160. };
  161. const LITERAL_BRACKET = {
  162. begin: /\{\{/,
  163. relevance: 0
  164. };
  165. const STRING = {
  166. className: 'string',
  167. contains: [ hljs.BACKSLASH_ESCAPE ],
  168. variants: [
  169. {
  170. begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?'''/,
  171. end: /'''/,
  172. contains: [
  173. hljs.BACKSLASH_ESCAPE,
  174. PROMPT
  175. ],
  176. relevance: 10
  177. },
  178. {
  179. begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?"""/,
  180. end: /"""/,
  181. contains: [
  182. hljs.BACKSLASH_ESCAPE,
  183. PROMPT
  184. ],
  185. relevance: 10
  186. },
  187. {
  188. begin: /([fF][rR]|[rR][fF]|[fF])'''/,
  189. end: /'''/,
  190. contains: [
  191. hljs.BACKSLASH_ESCAPE,
  192. PROMPT,
  193. LITERAL_BRACKET,
  194. SUBST
  195. ]
  196. },
  197. {
  198. begin: /([fF][rR]|[rR][fF]|[fF])"""/,
  199. end: /"""/,
  200. contains: [
  201. hljs.BACKSLASH_ESCAPE,
  202. PROMPT,
  203. LITERAL_BRACKET,
  204. SUBST
  205. ]
  206. },
  207. {
  208. begin: /([uU]|[rR])'/,
  209. end: /'/,
  210. relevance: 10
  211. },
  212. {
  213. begin: /([uU]|[rR])"/,
  214. end: /"/,
  215. relevance: 10
  216. },
  217. {
  218. begin: /([bB]|[bB][rR]|[rR][bB])'/,
  219. end: /'/
  220. },
  221. {
  222. begin: /([bB]|[bB][rR]|[rR][bB])"/,
  223. end: /"/
  224. },
  225. {
  226. begin: /([fF][rR]|[rR][fF]|[fF])'/,
  227. end: /'/,
  228. contains: [
  229. hljs.BACKSLASH_ESCAPE,
  230. LITERAL_BRACKET,
  231. SUBST
  232. ]
  233. },
  234. {
  235. begin: /([fF][rR]|[rR][fF]|[fF])"/,
  236. end: /"/,
  237. contains: [
  238. hljs.BACKSLASH_ESCAPE,
  239. LITERAL_BRACKET,
  240. SUBST
  241. ]
  242. },
  243. hljs.APOS_STRING_MODE,
  244. hljs.QUOTE_STRING_MODE
  245. ]
  246. };
  247. // https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals
  248. const digitpart = '[0-9](_?[0-9])*';
  249. const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
  250. // Whitespace after a number (or any lexical token) is needed only if its absence
  251. // would change the tokenization
  252. // https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens
  253. // We deviate slightly, requiring a word boundary or a keyword
  254. // to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`)
  255. const lookahead = `\\b|${RESERVED_WORDS.join('|')}`;
  256. const NUMBER = {
  257. className: 'number',
  258. relevance: 0,
  259. variants: [
  260. // exponentfloat, pointfloat
  261. // https://docs.python.org/3.9/reference/lexical_analysis.html#floating-point-literals
  262. // optionally imaginary
  263. // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
  264. // Note: no leading \b because floats can start with a decimal point
  265. // and we don't want to mishandle e.g. `fn(.5)`,
  266. // no trailing \b for pointfloat because it can end with a decimal point
  267. // and we don't want to mishandle e.g. `0..hex()`; this should be safe
  268. // because both MUST contain a decimal point and so cannot be confused with
  269. // the interior part of an identifier
  270. {
  271. begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})`
  272. },
  273. {
  274. begin: `(${pointfloat})[jJ]?`
  275. },
  276. // decinteger, bininteger, octinteger, hexinteger
  277. // https://docs.python.org/3.9/reference/lexical_analysis.html#integer-literals
  278. // optionally "long" in Python 2
  279. // https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals
  280. // decinteger is optionally imaginary
  281. // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
  282. {
  283. begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})`
  284. },
  285. {
  286. begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})`
  287. },
  288. {
  289. begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})`
  290. },
  291. {
  292. begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})`
  293. },
  294. // imagnumber (digitpart-based)
  295. // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
  296. {
  297. begin: `\\b(${digitpart})[jJ](?=${lookahead})`
  298. }
  299. ]
  300. };
  301. const COMMENT_TYPE = {
  302. className: "comment",
  303. begin: regex.lookahead(/# type:/),
  304. end: /$/,
  305. keywords: KEYWORDS,
  306. contains: [
  307. { // prevent keywords from coloring `type`
  308. begin: /# type:/
  309. },
  310. // comment within a datatype comment includes no keywords
  311. {
  312. begin: /#/,
  313. end: /\b\B/,
  314. endsWithParent: true
  315. }
  316. ]
  317. };
  318. const PARAMS = {
  319. className: 'params',
  320. variants: [
  321. // Exclude params in functions without params
  322. {
  323. className: "",
  324. begin: /\(\s*\)/,
  325. skip: true
  326. },
  327. {
  328. begin: /\(/,
  329. end: /\)/,
  330. excludeBegin: true,
  331. excludeEnd: true,
  332. keywords: KEYWORDS,
  333. contains: [
  334. 'self',
  335. PROMPT,
  336. NUMBER,
  337. STRING,
  338. hljs.HASH_COMMENT_MODE
  339. ]
  340. }
  341. ]
  342. };
  343. SUBST.contains = [
  344. STRING,
  345. NUMBER,
  346. PROMPT
  347. ];
  348. return {
  349. name: 'Python',
  350. aliases: [
  351. 'py',
  352. 'gyp',
  353. 'ipython'
  354. ],
  355. unicodeRegex: true,
  356. keywords: KEYWORDS,
  357. illegal: /(<\/|\?)|=>/,
  358. contains: [
  359. PROMPT,
  360. NUMBER,
  361. {
  362. // very common convention
  363. scope: 'variable.language',
  364. match: /\bself\b/
  365. },
  366. {
  367. // eat "if" prior to string so that it won't accidentally be
  368. // labeled as an f-string
  369. beginKeywords: "if",
  370. relevance: 0
  371. },
  372. { match: /\bor\b/, scope: "keyword" },
  373. STRING,
  374. COMMENT_TYPE,
  375. hljs.HASH_COMMENT_MODE,
  376. {
  377. match: [
  378. /\bdef/, /\s+/,
  379. IDENT_RE,
  380. ],
  381. scope: {
  382. 1: "keyword",
  383. 3: "title.function"
  384. },
  385. contains: [ PARAMS ]
  386. },
  387. {
  388. variants: [
  389. {
  390. match: [
  391. /\bclass/, /\s+/,
  392. IDENT_RE, /\s*/,
  393. /\(\s*/, IDENT_RE,/\s*\)/
  394. ],
  395. },
  396. {
  397. match: [
  398. /\bclass/, /\s+/,
  399. IDENT_RE
  400. ],
  401. }
  402. ],
  403. scope: {
  404. 1: "keyword",
  405. 3: "title.class",
  406. 6: "title.class.inherited",
  407. }
  408. },
  409. {
  410. className: 'meta',
  411. begin: /^[\t ]*@/,
  412. end: /(?=#)|$/,
  413. contains: [
  414. NUMBER,
  415. PARAMS,
  416. STRING
  417. ]
  418. }
  419. ]
  420. };
  421. }
  422. export { python as default };