bazarr/libs/apprise/conversion.py

# -*- coding: utf-8 -*-
# BSD 2-Clause License
#
# Apprise - Push Notification Library.
# Copyright (c) 2024, Chris Caron <lead2gold@gmail.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
#    this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import re
from markdown import markdown
from .common import NotifyFormat
from .URLBase import URLBase

from html.parser import HTMLParser


def convert_between(from_format, to_format, content):
    """
    Converts between different suported formats. If no conversion exists,
    or the selected one fails, the original text will be returned.

    This function returns the content translated (if required)
    """

    converters = {
        (NotifyFormat.MARKDOWN, NotifyFormat.HTML): markdown_to_html,
        (NotifyFormat.TEXT, NotifyFormat.HTML): text_to_html,
        (NotifyFormat.HTML, NotifyFormat.TEXT): html_to_text,
        # For now; use same converter for Markdown support
        (NotifyFormat.HTML, NotifyFormat.MARKDOWN): html_to_text,
    }

    convert = converters.get((from_format, to_format))
    return convert(content) if convert else content


def markdown_to_html(content):
    """
    Converts specified content from markdown to HTML.
    """
    return markdown(content, extensions=[
        'markdown.extensions.nl2br', 'markdown.extensions.tables'])


def text_to_html(content):
    """
    Converts specified content from plain text to HTML.
    """

    # First eliminate any carriage returns
    return URLBase.escape_html(content, convert_new_lines=True)


def html_to_text(content):
    """
    Converts a content from HTML to plain text.
    """

    parser = HTMLConverter()
    parser.feed(content)
    parser.close()
    return parser.converted


class HTMLConverter(HTMLParser, object):
    """An HTML to plain text converter tuned for email messages."""

    # The following tags must start on a new line
    BLOCK_TAGS = ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
                  'div', 'td', 'th', 'code', 'pre', 'label', 'li',)

    # the folowing tags ignore any internal text
    IGNORE_TAGS = (
        'form', 'input', 'textarea', 'select', 'ul', 'ol', 'style', 'link',
        'meta', 'title', 'html', 'head', 'script')

    # Condense Whitespace
    WS_TRIM = re.compile(r'[\s]+', re.DOTALL | re.MULTILINE)

    # Sentinel value for block tag boundaries, which may be consolidated into a
    # single line break.
    BLOCK_END = {}

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        # Shoudl we store the text content or not?
        self._do_store = True

        # Initialize internal result list
        self._result = []

        # Initialize public result field (not populated until close() is
        # called)
        self.converted = ""

    def close(self):
        string = ''.join(self._finalize(self._result))
        self.converted = string.strip()

    def _finalize(self, result):
        """
        Combines and strips consecutive strings, then converts consecutive
        block ends into singleton newlines.

        [ {be} " Hello " {be} {be} " World!" ] -> "\nHello\nWorld!"
        """

        # None means the last visited item was a block end.
        accum = None

        for item in result:
            if item == self.BLOCK_END:
                # Multiple consecutive block ends; do nothing.
                if accum is None:
                    continue

                # First block end; yield the current string, plus a newline.
                yield accum.strip() + '\n'
                accum = None

            # Multiple consecutive strings; combine them.
            elif accum is not None:
                accum += item

            # First consecutive string; store it.
            else:
                accum = item

        # Yield the last string if we have not already done so.
        if accum is not None:
            yield accum.strip()

    def handle_data(self, data, *args, **kwargs):
        """
        Store our data if it is not on the ignore list
        """

        # initialize our previous flag
        if self._do_store:

            # Tidy our whitespace
            content = self.WS_TRIM.sub(' ', data)
            self._result.append(content)

    def handle_starttag(self, tag, attrs):
        """
        Process our starting HTML Tag
        """
        # Toggle initial states
        self._do_store = tag not in self.IGNORE_TAGS

        if tag in self.BLOCK_TAGS:
            self._result.append(self.BLOCK_END)

        if tag == 'li':
            self._result.append('- ')

        elif tag == 'br':
            self._result.append('\n')

        elif tag == 'hr':
            if self._result:
                self._result[-1] = self._result[-1].rstrip(' ')

            self._result.append('\n---\n')

        elif tag == 'blockquote':
            self._result.append(' >')

    def handle_endtag(self, tag):
        """
        Edge case handling of open/close tags
        """
        self._do_store = True

        if tag in self.BLOCK_TAGS:
            self._result.append(self.BLOCK_END)
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00			`# -- coding: utf-8 --`
Updated to apprise v1.6.0 Changelog: https://github.com/caronc/apprise/releases Highlights: * v1.6.0 * Notifiarr * v1.5.0 * Pushy * PushDeer * PushMe * RSyslog * v1.4.5 * WhatsApp * Burst SMS 2023-11-28 21:12:04 +00:00			`# BSD 2-Clause License`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00			`#`
Updated apprise module to improve notification system. #2163 2023-06-07 18:05:42 +00:00			`# Apprise - Push Notification Library.`
Updated multiple Python modules (now in libs and custom_libs directories) and React libraries 2024-03-03 17:15:23 +00:00			`# Copyright (c) 2024, Chris Caron <lead2gold@gmail.com>`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00			`#`
Updated apprise module to improve notification system. #2163 2023-06-07 18:05:42 +00:00			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00			`#`
Updated apprise module to improve notification system. #2163 2023-06-07 18:05:42 +00:00			`# 1. Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions and the following disclaimer.`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00			`#`
Updated apprise module to improve notification system. #2163 2023-06-07 18:05:42 +00:00			`# 2. Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions and the following disclaimer in the documentation`
			`# and/or other materials provided with the distribution.`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00			`#`
Updated apprise module to improve notification system. #2163 2023-06-07 18:05:42 +00:00			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00
			`import re`
			`from markdown import markdown`
			`from .common import NotifyFormat`
Reverted to apprise 1.7.6 to fix an issue with the upgrade process first. 1.8.0 will get back in nightly shortly. #2497 2024-05-24 17:19:37 +00:00			`from .URLBase import URLBase`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00
Upgraded Apprise to 1.1.0 version. 2022-10-11 01:19:24 +00:00			`from html.parser import HTMLParser`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00

			`def convert_between(from_format, to_format, content):`
			`"""`
			`Converts between different suported formats. If no conversion exists,`
			`or the selected one fails, the original text will be returned.`

			`This function returns the content translated (if required)`
			`"""`

			`converters = {`
			`(NotifyFormat.MARKDOWN, NotifyFormat.HTML): markdown_to_html,`
			`(NotifyFormat.TEXT, NotifyFormat.HTML): text_to_html,`
			`(NotifyFormat.HTML, NotifyFormat.TEXT): html_to_text,`
			`# For now; use same converter for Markdown support`
			`(NotifyFormat.HTML, NotifyFormat.MARKDOWN): html_to_text,`
			`}`

			`convert = converters.get((from_format, to_format))`
			`return convert(content) if convert else content`


			`def markdown_to_html(content):`
			`"""`
			`Converts specified content from markdown to HTML.`
			`"""`
Updated apprise to version 1.7.6 2024-04-15 19:00:10 +00:00			`return markdown(content, extensions=[`
			`'markdown.extensions.nl2br', 'markdown.extensions.tables'])`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00

			`def text_to_html(content):`
			`"""`
			`Converts specified content from plain text to HTML.`
			`"""`

Upgraded Apprise to 1.1.0 version. 2022-10-11 01:19:24 +00:00			`# First eliminate any carriage returns`
			`return URLBase.escape_html(content, convert_new_lines=True)`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00

			`def html_to_text(content):`
			`"""`
			`Converts a content from HTML to plain text.`
			`"""`

			`parser = HTMLConverter()`
			`parser.feed(content)`
			`parser.close()`
			`return parser.converted`


			`class HTMLConverter(HTMLParser, object):`
			`"""An HTML to plain text converter tuned for email messages."""`

			`# The following tags must start on a new line`
			`BLOCK_TAGS = ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',`
			`'div', 'td', 'th', 'code', 'pre', 'label', 'li',)`

			`# the folowing tags ignore any internal text`
Upgraded Apprise to 1.1.0 version. 2022-10-11 01:19:24 +00:00			`IGNORE_TAGS = (`
			`'form', 'input', 'textarea', 'select', 'ul', 'ol', 'style', 'link',`
			`'meta', 'title', 'html', 'head', 'script')`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00
			`# Condense Whitespace`
			`WS_TRIM = re.compile(r'[\s]+', re.DOTALL \| re.MULTILINE)`

			`# Sentinel value for block tag boundaries, which may be consolidated into a`
			`# single line break.`
			`BLOCK_END = {}`

			`def __init__(self, **kwargs):`
Updated apprise module to improve notification system. #2163 2023-06-07 18:05:42 +00:00			`super().__init__(**kwargs)`
Updated apprise to the latest version. #1834 2022-05-08 13:50:49 +00:00
			`# Shoudl we store the text content or not?`
			`self._do_store = True`

			`# Initialize internal result list`
			`self._result = []`

			`# Initialize public result field (not populated until close() is`
			`# called)`
			`self.converted = ""`

			`def close(self):`
			`string = ''.join(self._finalize(self._result))`
			`self.converted = string.strip()`

			`def _finalize(self, result):`
			`"""`
			`Combines and strips consecutive strings, then converts consecutive`
			`block ends into singleton newlines.`

			`[ {be} " Hello " {be} {be} " World!" ] -> "\nHello\nWorld!"`
			`"""`

			`# None means the last visited item was a block end.`
			`accum = None`

			`for item in result:`
			`if item == self.BLOCK_END:`
			`# Multiple consecutive block ends; do nothing.`
			`if accum is None:`
			`continue`

			`# First block end; yield the current string, plus a newline.`
			`yield accum.strip() + '\n'`
			`accum = None`

			`# Multiple consecutive strings; combine them.`
			`elif accum is not None:`
			`accum += item`

			`# First consecutive string; store it.`
			`else:`
			`accum = item`

			`# Yield the last string if we have not already done so.`
			`if accum is not None:`
			`yield accum.strip()`

			`def handle_data(self, data, args, *kwargs):`
			`"""`
			`Store our data if it is not on the ignore list`
			`"""`

			`# initialize our previous flag`
			`if self._do_store:`

			`# Tidy our whitespace`
			`content = self.WS_TRIM.sub(' ', data)`
			`self._result.append(content)`

			`def handle_starttag(self, tag, attrs):`
			`"""`
			`Process our starting HTML Tag`
			`"""`
			`# Toggle initial states`
			`self._do_store = tag not in self.IGNORE_TAGS`

			`if tag in self.BLOCK_TAGS:`
			`self._result.append(self.BLOCK_END)`

			`if tag == 'li':`
			`self._result.append('- ')`

			`elif tag == 'br':`
			`self._result.append('\n')`

			`elif tag == 'hr':`
			`if self._result:`
			`self._result[-1] = self._result[-1].rstrip(' ')`

			`self._result.append('\n---\n')`

			`elif tag == 'blockquote':`
			`self._result.append(' >')`

			`def handle_endtag(self, tag):`
			`"""`
			`Edge case handling of open/close tags`
			`"""`
			`self._do_store = True`

			`if tag in self.BLOCK_TAGS:`
			`self._result.append(self.BLOCK_END)`