bazarr/libs/markdown/util.py

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import sys


"""
Python 3 Stuff
=============================================================================
"""
PY3 = sys.version_info[0] == 3

if PY3:  # pragma: no cover
    string_type = str
    text_type = str
    int2str = chr
else:  # pragma: no cover
    string_type = basestring   # noqa
    text_type = unicode        # noqa
    int2str = unichr           # noqa


"""
Constants you might want to modify
-----------------------------------------------------------------------------
"""


BLOCK_LEVEL_ELEMENTS = re.compile(
    r"^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
    r"|script|noscript|form|fieldset|iframe|math"
    r"|hr|hr/|style|li|dt|dd|thead|tbody"
    r"|tr|th|td|section|footer|header|group|figure"
    r"|figcaption|aside|article|canvas|output"
    r"|progress|video|nav|main)$",
    re.IGNORECASE
)
# Placeholders
STX = '\u0002'  # Use STX ("Start of text") for start-of-placeholder
ETX = '\u0003'  # Use ETX ("End of text") for end-of-placeholder
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
AMP_SUBSTITUTE = STX+"amp"+ETX
HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX


"""
Constants you probably do not need to change
-----------------------------------------------------------------------------
"""

RTL_BIDI_RANGES = (
    ('\u0590', '\u07FF'),
    # Hebrew (0590-05FF), Arabic (0600-06FF),
    # Syriac (0700-074F), Arabic supplement (0750-077F),
    # Thaana (0780-07BF), Nko (07C0-07FF).
    ('\u2D30', '\u2D7F')  # Tifinagh
)

# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
# markdown.util import etree`).  Do not import it by yourself.

try:  # pragma: no cover
    # Is the C implementation of ElementTree available?
    import xml.etree.cElementTree as etree
    from xml.etree.ElementTree import Comment
    # Serializers (including ours) test with non-c Comment
    etree.test_comment = Comment
    if etree.VERSION < "1.0.5":
        raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
except (ImportError, RuntimeError):  # pragma: no cover
    # Use the Python implementation of ElementTree?
    import xml.etree.ElementTree as etree
    if etree.VERSION < "1.1":
        raise RuntimeError("ElementTree version 1.1 or higher is required")


"""
AUXILIARY GLOBAL FUNCTIONS
=============================================================================
"""


def isBlockLevel(tag):
    """Check if the tag is a block level HTML tag."""
    if isinstance(tag, string_type):
        return BLOCK_LEVEL_ELEMENTS.match(tag)
    # Some ElementTree tags are not strings, so return False.
    return False


def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
    """Parses a string representing bool value. If parsing was successful,
       returns True or False. If preserve_none=True, returns True, False,
       or None. If parsing was not successful, raises  ValueError, or, if
       fail_on_errors=False, returns None."""
    if not isinstance(value, string_type):
        if preserve_none and value is None:
            return value
        return bool(value)
    elif preserve_none and value.lower() == 'none':
        return None
    elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
        return True
    elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
        return False
    elif fail_on_errors:
        raise ValueError('Cannot parse bool value: %r' % value)


"""
MISC AUXILIARY CLASSES
=============================================================================
"""


class AtomicString(text_type):
    """A string which should not be further processed."""
    pass


class Processor(object):
    def __init__(self, markdown_instance=None):
        if markdown_instance:
            self.markdown = markdown_instance


class HtmlStash(object):
    """
    This class is used for stashing HTML objects that we extract
    in the beginning and replace with place-holders.
    """

    def __init__(self):
        """ Create a HtmlStash. """
        self.html_counter = 0  # for counting inline html segments
        self.rawHtmlBlocks = []
        self.tag_counter = 0
        self.tag_data = []  # list of dictionaries in the order tags appear

    def store(self, html, safe=False):
        """
        Saves an HTML segment for later reinsertion.  Returns a
        placeholder string that needs to be inserted into the
        document.

        Keyword arguments:

        * html: an html segment
        * safe: label an html segment as safe for safemode

        Returns : a placeholder string

        """
        self.rawHtmlBlocks.append((html, safe))
        placeholder = self.get_placeholder(self.html_counter)
        self.html_counter += 1
        return placeholder

    def reset(self):
        self.html_counter = 0
        self.rawHtmlBlocks = []

    def get_placeholder(self, key):
        return HTML_PLACEHOLDER % key

    def store_tag(self, tag, attrs, left_index, right_index):
        """Store tag data and return a placeholder."""
        self.tag_data.append({'tag': tag, 'attrs': attrs,
                              'left_index': left_index,
                              'right_index': right_index})
        placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
        self.tag_counter += 1  # equal to the tag's index in self.tag_data
        return placeholder
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`# -- coding: utf-8 --`
			`from __future__ import unicode_literals`
			`import re`
			`import sys`


			`"""`
			`Python 3 Stuff`
			`=============================================================================`
			`"""`
			`PY3 = sys.version_info[0] == 3`

			`if PY3: # pragma: no cover`
			`string_type = str`
			`text_type = str`
			`int2str = chr`
			`else: # pragma: no cover`
			`string_type = basestring # noqa`
			`text_type = unicode # noqa`
			`int2str = unichr # noqa`


			`"""`
			`Constants you might want to modify`
			`-----------------------------------------------------------------------------`
			`"""`


			`BLOCK_LEVEL_ELEMENTS = re.compile(`
			`r"^(p\|div\|h[1-6]\|blockquote\|pre\|table\|dl\|ol\|ul"`
			`r"\|script\|noscript\|form\|fieldset\|iframe\|math"`
			`r"\|hr\|hr/\|style\|li\|dt\|dd\|thead\|tbody"`
			`r"\|tr\|th\|td\|section\|footer\|header\|group\|figure"`
			`r"\|figcaption\|aside\|article\|canvas\|output"`
			`r"\|progress\|video\|nav\|main)$",`
			`re.IGNORECASE`
			`)`
			`# Placeholders`
			`STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder`
			`ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder`
			`INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"`
			`INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX`
			`INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')`
			`AMP_SUBSTITUTE = STX+"amp"+ETX`
			`HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX`
			`HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')`
			`TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX`


			`"""`
			`Constants you probably do not need to change`
			`-----------------------------------------------------------------------------`
			`"""`

			`RTL_BIDI_RANGES = (`
			`('\u0590', '\u07FF'),`
			`# Hebrew (0590-05FF), Arabic (0600-06FF),`
			`# Syriac (0700-074F), Arabic supplement (0750-077F),`
			`# Thaana (0780-07BF), Nko (07C0-07FF).`
			`('\u2D30', '\u2D7F') # Tifinagh`
			`)`

			# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
			# markdown.util import etree`). Do not import it by yourself.

			`try: # pragma: no cover`
			`# Is the C implementation of ElementTree available?`
			`import xml.etree.cElementTree as etree`
			`from xml.etree.ElementTree import Comment`
			`# Serializers (including ours) test with non-c Comment`
			`etree.test_comment = Comment`
			`if etree.VERSION < "1.0.5":`
			`raise RuntimeError("cElementTree version 1.0.5 or higher is required.")`
			`except (ImportError, RuntimeError): # pragma: no cover`
			`# Use the Python implementation of ElementTree?`
			`import xml.etree.ElementTree as etree`
			`if etree.VERSION < "1.1":`
			`raise RuntimeError("ElementTree version 1.1 or higher is required")`


			`"""`
			`AUXILIARY GLOBAL FUNCTIONS`
			`=============================================================================`
			`"""`


			`def isBlockLevel(tag):`
			`"""Check if the tag is a block level HTML tag."""`
			`if isinstance(tag, string_type):`
			`return BLOCK_LEVEL_ELEMENTS.match(tag)`
			`# Some ElementTree tags are not strings, so return False.`
			`return False`


			`def parseBoolValue(value, fail_on_errors=True, preserve_none=False):`
			`"""Parses a string representing bool value. If parsing was successful,`
			`returns True or False. If preserve_none=True, returns True, False,`
			`or None. If parsing was not successful, raises ValueError, or, if`
			`fail_on_errors=False, returns None."""`
			`if not isinstance(value, string_type):`
			`if preserve_none and value is None:`
			`return value`
			`return bool(value)`
			`elif preserve_none and value.lower() == 'none':`
			`return None`
			`elif value.lower() in ('true', 'yes', 'y', 'on', '1'):`
			`return True`
			`elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):`
			`return False`
			`elif fail_on_errors:`
			`raise ValueError('Cannot parse bool value: %r' % value)`


			`"""`
			`MISC AUXILIARY CLASSES`
			`=============================================================================`
			`"""`


			`class AtomicString(text_type):`
			`"""A string which should not be further processed."""`
			`pass`


			`class Processor(object):`
			`def __init__(self, markdown_instance=None):`
			`if markdown_instance:`
			`self.markdown = markdown_instance`


			`class HtmlStash(object):`
			`"""`
			`This class is used for stashing HTML objects that we extract`
			`in the beginning and replace with place-holders.`
			`"""`

			`def __init__(self):`
			`""" Create a HtmlStash. """`
			`self.html_counter = 0 # for counting inline html segments`
			`self.rawHtmlBlocks = []`
			`self.tag_counter = 0`
			`self.tag_data = [] # list of dictionaries in the order tags appear`

			`def store(self, html, safe=False):`
			`"""`
			`Saves an HTML segment for later reinsertion. Returns a`
			`placeholder string that needs to be inserted into the`
			`document.`

			`Keyword arguments:`

			`* html: an html segment`
			`* safe: label an html segment as safe for safemode`

			`Returns : a placeholder string`

			`"""`
			`self.rawHtmlBlocks.append((html, safe))`
			`placeholder = self.get_placeholder(self.html_counter)`
			`self.html_counter += 1`
			`return placeholder`

			`def reset(self):`
			`self.html_counter = 0`
			`self.rawHtmlBlocks = []`

			`def get_placeholder(self, key):`
			`return HTML_PLACEHOLDER % key`

			`def store_tag(self, tag, attrs, left_index, right_index):`
			`"""Store tag data and return a placeholder."""`
			`self.tag_data.append({'tag': tag, 'attrs': attrs,`
			`'left_index': left_index,`
			`'right_index': right_index})`
			`placeholder = TAG_PLACEHOLDER % str(self.tag_counter)`
			`self.tag_counter += 1 # equal to the tag's index in self.tag_data`
			`return placeholder`