Spaces:
Paused
Paused
| # CodeHilite Extension for Python-Markdown | |
| # ======================================== | |
| # Adds code/syntax highlighting to standard Python-Markdown code blocks. | |
| # See https://Python-Markdown.github.io/extensions/code_hilite | |
| # for documentation. | |
| # Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). | |
| # All changes Copyright 2008-2014 The Python Markdown Project | |
| # License: [BSD](https://opensource.org/licenses/bsd-license.php) | |
| """ | |
| Adds code/syntax highlighting to standard Python-Markdown code blocks. | |
| See the [documentation](https://Python-Markdown.github.io/extensions/code_hilite) | |
| for details. | |
| """ | |
| from __future__ import annotations | |
| from . import Extension | |
| from ..treeprocessors import Treeprocessor | |
| from ..util import parseBoolValue | |
| from typing import TYPE_CHECKING, Callable, Any | |
| if TYPE_CHECKING: # pragma: no cover | |
| import xml.etree.ElementTree as etree | |
| try: # pragma: no cover | |
| from pygments import highlight | |
| from pygments.lexers import get_lexer_by_name, guess_lexer | |
| from pygments.formatters import get_formatter_by_name | |
| from pygments.util import ClassNotFound | |
| pygments = True | |
| except ImportError: # pragma: no cover | |
| pygments = False | |
| def parse_hl_lines(expr: str) -> list[int]: | |
| """Support our syntax for emphasizing certain lines of code. | |
| `expr` should be like '1 2' to emphasize lines 1 and 2 of a code block. | |
| Returns a list of integers, the line numbers to emphasize. | |
| """ | |
| if not expr: | |
| return [] | |
| try: | |
| return list(map(int, expr.split())) | |
| except ValueError: # pragma: no cover | |
| return [] | |
| # ------------------ The Main CodeHilite Class ---------------------- | |
| class CodeHilite: | |
| """ | |
| Determine language of source code, and pass it on to the Pygments highlighter. | |
| Usage: | |
| ```python | |
| code = CodeHilite(src=some_code, lang='python') | |
| html = code.hilite() | |
| ``` | |
| Arguments: | |
| src: Source string or any object with a `.readline` attribute. | |
| Keyword arguments: | |
| lang (str): String name of Pygments lexer to use for highlighting. Default: `None`. | |
| guess_lang (bool): Auto-detect which lexer to use. | |
| Ignored if `lang` is set to a valid value. Default: `True`. | |
| use_pygments (bool): Pass code to Pygments for code highlighting. If `False`, the code is | |
| instead wrapped for highlighting by a JavaScript library. Default: `True`. | |
| pygments_formatter (str): The name of a Pygments formatter or a formatter class used for | |
| highlighting the code blocks. Default: `html`. | |
| linenums (bool): An alias to Pygments `linenos` formatter option. Default: `None`. | |
| css_class (str): An alias to Pygments `cssclass` formatter option. Default: 'codehilite'. | |
| lang_prefix (str): Prefix prepended to the language. Default: "language-". | |
| Other Options: | |
| Any other options are accepted and passed on to the lexer and formatter. Therefore, | |
| valid options include any options which are accepted by the `html` formatter or | |
| whichever lexer the code's language uses. Note that most lexers do not have any | |
| options. However, a few have very useful options, such as PHP's `startinline` option. | |
| Any invalid options are ignored without error. | |
| * **Formatter options**: <https://pygments.org/docs/formatters/#HtmlFormatter> | |
| * **Lexer Options**: <https://pygments.org/docs/lexers/> | |
| Additionally, when Pygments is enabled, the code's language is passed to the | |
| formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`. | |
| This option has no effect to the Pygments' builtin formatters. | |
| Advanced Usage: | |
| ```python | |
| code = CodeHilite( | |
| src = some_code, | |
| lang = 'php', | |
| startinline = True, # Lexer option. Snippet does not start with `<?php`. | |
| linenostart = 42, # Formatter option. Snippet starts on line 42. | |
| hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50. | |
| linenos = 'inline' # Formatter option. Avoid alignment problems. | |
| ) | |
| html = code.hilite() | |
| ``` | |
| """ | |
| def __init__(self, src: str, **options): | |
| self.src = src | |
| self.lang: str | None = options.pop('lang', None) | |
| self.guess_lang: bool = options.pop('guess_lang', True) | |
| self.use_pygments: bool = options.pop('use_pygments', True) | |
| self.lang_prefix: str = options.pop('lang_prefix', 'language-') | |
| self.pygments_formatter: str | Callable = options.pop('pygments_formatter', 'html') | |
| if 'linenos' not in options: | |
| options['linenos'] = options.pop('linenums', None) | |
| if 'cssclass' not in options: | |
| options['cssclass'] = options.pop('css_class', 'codehilite') | |
| if 'wrapcode' not in options: | |
| # Override Pygments default | |
| options['wrapcode'] = True | |
| # Disallow use of `full` option | |
| options['full'] = False | |
| self.options = options | |
| def hilite(self, shebang: bool = True) -> str: | |
| """ | |
| Pass code to the [Pygments](https://pygments.org/) highlighter with | |
| optional line numbers. The output should then be styled with CSS to | |
| your liking. No styles are applied by default - only styling hooks | |
| (i.e.: `<span class="k">`). | |
| returns : A string of html. | |
| """ | |
| self.src = self.src.strip('\n') | |
| if self.lang is None and shebang: | |
| self._parseHeader() | |
| if pygments and self.use_pygments: | |
| try: | |
| lexer = get_lexer_by_name(self.lang, **self.options) | |
| except ValueError: | |
| try: | |
| if self.guess_lang: | |
| lexer = guess_lexer(self.src, **self.options) | |
| else: | |
| lexer = get_lexer_by_name('text', **self.options) | |
| except ValueError: # pragma: no cover | |
| lexer = get_lexer_by_name('text', **self.options) | |
| if not self.lang: | |
| # Use the guessed lexer's language instead | |
| self.lang = lexer.aliases[0] | |
| lang_str = f'{self.lang_prefix}{self.lang}' | |
| if isinstance(self.pygments_formatter, str): | |
| try: | |
| formatter = get_formatter_by_name(self.pygments_formatter, **self.options) | |
| except ClassNotFound: | |
| formatter = get_formatter_by_name('html', **self.options) | |
| else: | |
| formatter = self.pygments_formatter(lang_str=lang_str, **self.options) | |
| return highlight(self.src, lexer, formatter) | |
| else: | |
| # just escape and build markup usable by JavaScript highlighting libraries | |
| txt = self.src.replace('&', '&') | |
| txt = txt.replace('<', '<') | |
| txt = txt.replace('>', '>') | |
| txt = txt.replace('"', '"') | |
| classes = [] | |
| if self.lang: | |
| classes.append('{}{}'.format(self.lang_prefix, self.lang)) | |
| if self.options['linenos']: | |
| classes.append('linenums') | |
| class_str = '' | |
| if classes: | |
| class_str = ' class="{}"'.format(' '.join(classes)) | |
| return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format( | |
| self.options['cssclass'], | |
| class_str, | |
| txt | |
| ) | |
| def _parseHeader(self) -> None: | |
| """ | |
| Determines language of a code block from shebang line and whether the | |
| said line should be removed or left in place. If the shebang line | |
| contains a path (even a single /) then it is assumed to be a real | |
| shebang line and left alone. However, if no path is given | |
| (e.i.: `#!python` or `:::python`) then it is assumed to be a mock shebang | |
| for language identification of a code fragment and removed from the | |
| code block prior to processing for code highlighting. When a mock | |
| shebang (e.i: `#!python`) is found, line numbering is turned on. When | |
| colons are found in place of a shebang (e.i.: `:::python`), line | |
| numbering is left in the current state - off by default. | |
| Also parses optional list of highlight lines, like: | |
| :::python hl_lines="1 3" | |
| """ | |
| import re | |
| # split text into lines | |
| lines = self.src.split("\n") | |
| # pull first line to examine | |
| fl = lines.pop(0) | |
| c = re.compile(r''' | |
| (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons | |
| (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path | |
| (?P<lang>[\w#.+-]*) # The language | |
| \s* # Arbitrary whitespace | |
| # Optional highlight lines, single- or double-quote-delimited | |
| (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? | |
| ''', re.VERBOSE) | |
| # search first line for shebang | |
| m = c.search(fl) | |
| if m: | |
| # we have a match | |
| try: | |
| self.lang = m.group('lang').lower() | |
| except IndexError: # pragma: no cover | |
| self.lang = None | |
| if m.group('path'): | |
| # path exists - restore first line | |
| lines.insert(0, fl) | |
| if self.options['linenos'] is None and m.group('shebang'): | |
| # Overridable and Shebang exists - use line numbers | |
| self.options['linenos'] = True | |
| self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines')) | |
| else: | |
| # No match | |
| lines.insert(0, fl) | |
| self.src = "\n".join(lines).strip("\n") | |
| # ------------------ The Markdown Extension ------------------------------- | |
| class HiliteTreeprocessor(Treeprocessor): | |
| """ Highlight source code in code blocks. """ | |
| config: dict[str, Any] | |
| def code_unescape(self, text: str) -> str: | |
| """Unescape code.""" | |
| text = text.replace("<", "<") | |
| text = text.replace(">", ">") | |
| # Escaped '&' should be replaced at the end to avoid | |
| # conflicting with < and >. | |
| text = text.replace("&", "&") | |
| return text | |
| def run(self, root: etree.Element) -> None: | |
| """ Find code blocks and store in `htmlStash`. """ | |
| blocks = root.iter('pre') | |
| for block in blocks: | |
| if len(block) == 1 and block[0].tag == 'code': | |
| local_config = self.config.copy() | |
| text = block[0].text | |
| if text is None: | |
| continue | |
| code = CodeHilite( | |
| self.code_unescape(text), | |
| tab_length=self.md.tab_length, | |
| style=local_config.pop('pygments_style', 'default'), | |
| **local_config | |
| ) | |
| placeholder = self.md.htmlStash.store(code.hilite()) | |
| # Clear code block in `etree` instance | |
| block.clear() | |
| # Change to `p` element which will later | |
| # be removed when inserting raw html | |
| block.tag = 'p' | |
| block.text = placeholder | |
| class CodeHiliteExtension(Extension): | |
| """ Add source code highlighting to markdown code blocks. """ | |
| def __init__(self, **kwargs): | |
| # define default configs | |
| self.config = { | |
| 'linenums': [ | |
| None, "Use lines numbers. True|table|inline=yes, False=no, None=auto. Default: `None`." | |
| ], | |
| 'guess_lang': [ | |
| True, "Automatic language detection - Default: `True`." | |
| ], | |
| 'css_class': [ | |
| "codehilite", "Set class name for wrapper <div> - Default: `codehilite`." | |
| ], | |
| 'pygments_style': [ | |
| 'default', 'Pygments HTML Formatter Style (Colorscheme). Default: `default`.' | |
| ], | |
| 'noclasses': [ | |
| False, 'Use inline styles instead of CSS classes - Default `False`.' | |
| ], | |
| 'use_pygments': [ | |
| True, 'Highlight code blocks with pygments. Disable if using a JavaScript library. Default: `True`.' | |
| ], | |
| 'lang_prefix': [ | |
| 'language-', 'Prefix prepended to the language when `use_pygments` is false. Default: `language-`.' | |
| ], | |
| 'pygments_formatter': [ | |
| 'html', 'Use a specific formatter for Pygments highlighting. Default: `html`.' | |
| ], | |
| } | |
| """ Default configuration options. """ | |
| for key, value in kwargs.items(): | |
| if key in self.config: | |
| self.setConfig(key, value) | |
| else: | |
| # manually set unknown keywords. | |
| if isinstance(value, str): | |
| try: | |
| # Attempt to parse `str` as a boolean value | |
| value = parseBoolValue(value, preserve_none=True) | |
| except ValueError: | |
| pass # Assume it's not a boolean value. Use as-is. | |
| self.config[key] = [value, ''] | |
| def extendMarkdown(self, md): | |
| """ Add `HilitePostprocessor` to Markdown instance. """ | |
| hiliter = HiliteTreeprocessor(md) | |
| hiliter.config = self.getConfigs() | |
| md.treeprocessors.register(hiliter, 'hilite', 30) | |
| md.registerExtension(self) | |
| def makeExtension(**kwargs): # pragma: no cover | |
| return CodeHiliteExtension(**kwargs) | |