bazarr/libs/guessit/rules/common/formatters.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Formatters
"""
from rebulk.formatters import formatters
from rebulk.remodule import re
from . import seps

_excluded_clean_chars = ',:;-/\\'
clean_chars = ""
for sep in seps:
    if sep not in _excluded_clean_chars:
        clean_chars += sep


def _potential_before(i, input_string):
    """
    Check if the character at position i can be a potential single char separator considering what's before it.

    :param i:
    :type i: int
    :param input_string:
    :type input_string: str
    :return:
    :rtype: bool
    """
    return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps


def _potential_after(i, input_string):
    """
    Check if the character at position i can be a potential single char separator considering what's after it.

    :param i:
    :type i: int
    :param input_string:
    :type input_string: str
    :return:
    :rtype: bool
    """
    return i + 2 >= len(input_string) or \
           input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps


def cleanup(input_string):
    """
    Removes and strip separators from input_string (but keep ',;' characters)

    It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)

    :param input_string:
    :type input_string: str
    :return:
    :rtype:
    """
    clean_string = input_string
    for char in clean_chars:
        clean_string = clean_string.replace(char, ' ')

    # Restore input separator if they separate single characters.
    # Useful for Mavels Agents of S.H.I.E.L.D.
    # https://github.com/guessit-io/guessit/issues/278

    indices = [i for i, letter in enumerate(clean_string) if letter in seps]

    dots = set()
    if indices:
        clean_list = list(clean_string)

        potential_indices = []

        for i in indices:
            if _potential_before(i, input_string) and _potential_after(i, input_string):
                potential_indices.append(i)

        replace_indices = []

        for potential_index in potential_indices:
            if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
                replace_indices.append(potential_index)

        if replace_indices:
            for replace_index in replace_indices:
                dots.add(input_string[replace_index])
                clean_list[replace_index] = input_string[replace_index]
            clean_string = ''.join(clean_list)

    clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))

    clean_string = re.sub(' +', ' ', clean_string)
    return clean_string


def strip(input_string, chars=seps):
    """
    Strip separators from input_string
    :param input_string:
    :param chars:
    :type input_string:
    :return:
    :rtype:
    """
    return input_string.strip(chars)


def raw_cleanup(raw):
    """
    Cleanup a raw value to perform raw comparison
    :param raw:
    :type raw:
    :return:
    :rtype:
    """
    return formatters(cleanup, strip)(raw.lower())


def reorder_title(title, articles=('the',), separators=(',', ', ')):
    """
    Reorder the title
    :param title:
    :type title:
    :param articles:
    :type articles:
    :param separators:
    :type separators:
    :return:
    :rtype:
    """
    ltitle = title.lower()
    for article in articles:
        for separator in separators:
            suffix = separator + article
            if ltitle[-len(suffix):] == suffix:
                return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
    return title