bazarr/libs/markdown/preprocessors.py

"""
Python Markdown

A Python implementation of John Gruber's Markdown.

Documentation: https://python-markdown.github.io/
GitHub: https://github.com/Python-Markdown/markdown/
PyPI: https://pypi.org/project/Markdown/

Started by Manfred Stienstra (http://www.dwerg.net/).
Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
Currently maintained by Waylan Limberg (https://github.com/waylan),
Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).

Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
Copyright 2004 Manfred Stienstra (the original version)

License: BSD (see LICENSE.md for details).

PRE-PROCESSORS
=============================================================================

Preprocessors work on source text before we start doing anything too
complicated.
"""

from . import util
from .htmlparser import HTMLExtractor
import re


def build_preprocessors(md, **kwargs):
    """ Build the default set of preprocessors used by Markdown. """
    preprocessors = util.Registry()
    preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30)
    preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
    return preprocessors


class Preprocessor(util.Processor):
    """
    Preprocessors are run after the text is broken into lines.

    Each preprocessor implements a "run" method that takes a pointer to a
    list of lines of the document, modifies it as necessary and returns
    either the same pointer or a pointer to a new list.

    Preprocessors must extend markdown.Preprocessor.

    """
    def run(self, lines):
        """
        Each subclass of Preprocessor should override the `run` method, which
        takes the document as a list of strings split by newlines and returns
        the (possibly modified) list of lines.

        """
        pass  # pragma: no cover


class NormalizeWhitespace(Preprocessor):
    """ Normalize whitespace for consistent parsing. """

    def run(self, lines):
        source = '\n'.join(lines)
        source = source.replace(util.STX, "").replace(util.ETX, "")
        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        source = source.expandtabs(self.md.tab_length)
        source = re.sub(r'(?<=\n) +\n', '\n', source)
        return source.split('\n')


class HtmlBlockPreprocessor(Preprocessor):
    """Remove html blocks from the text and store them for later retrieval."""

    def run(self, lines):
        source = '\n'.join(lines)
        parser = HTMLExtractor(self.md)
        parser.feed(source)
        parser.close()
        return ''.join(parser.cleandoc).split('\n')
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`"""`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`Python Markdown`

			`A Python implementation of John Gruber's Markdown.`

			`Documentation: https://python-markdown.github.io/`
			`GitHub: https://github.com/Python-Markdown/markdown/`
			`PyPI: https://pypi.org/project/Markdown/`

			`Started by Manfred Stienstra (http://www.dwerg.net/).`
			`Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).`
			`Currently maintained by Waylan Limberg (https://github.com/waylan),`
			`Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).`

			`Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)`
			`Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)`
			`Copyright 2004 Manfred Stienstra (the original version)`

			`License: BSD (see LICENSE.md for details).`

Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`PRE-PROCESSORS`
			`=============================================================================`

			`Preprocessors work on source text before we start doing anything too`
			`complicated.`
			`"""`

			`from . import util`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`from .htmlparser import HTMLExtractor`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`import re`


Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`def build_preprocessors(md, **kwargs):`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`""" Build the default set of preprocessors used by Markdown. """`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`preprocessors = util.Registry()`
			`preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30)`
			`preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`return preprocessors`


			`class Preprocessor(util.Processor):`
			`"""`
			`Preprocessors are run after the text is broken into lines.`

			`Each preprocessor implements a "run" method that takes a pointer to a`
			`list of lines of the document, modifies it as necessary and returns`
			`either the same pointer or a pointer to a new list.`

			`Preprocessors must extend markdown.Preprocessor.`

			`"""`
			`def run(self, lines):`
			`"""`
			Each subclass of Preprocessor should override the `run` method, which
			`takes the document as a list of strings split by newlines and returns`
			`the (possibly modified) list of lines.`

			`"""`
			`pass # pragma: no cover`


			`class NormalizeWhitespace(Preprocessor):`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`""" Normalize whitespace for consistent parsing. """`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00
			`def run(self, lines):`
			`source = '\n'.join(lines)`
			`source = source.replace(util.STX, "").replace(util.ETX, "")`
			`source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`source = source.expandtabs(self.md.tab_length)`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`source = re.sub(r'(?<=\n) +\n', '\n', source)`
			`return source.split('\n')`


			`class HtmlBlockPreprocessor(Preprocessor):`
			`"""Remove html blocks from the text and store them for later retrieval."""`

			`def run(self, lines):`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`source = '\n'.join(lines)`
			`parser = HTMLExtractor(self.md)`
			`parser.feed(source)`
			`parser.close()`
			`return ''.join(parser.cleandoc).split('\n')`