#!/usr/bin/env python # -*- coding: utf-8 -*- """ Formatters """ from rebulk.formatters import formatters from rebulk.remodule import re from . import seps _excluded_clean_chars = ',:;-/\\' clean_chars = "" for sep in seps: if sep not in _excluded_clean_chars: clean_chars += sep def _potential_before(i, input_string): """ Check if the character at position i can be a potential single char separator considering what's before it. :param i: :type i: int :param input_string: :type input_string: str :return: :rtype: bool """ return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps def _potential_after(i, input_string): """ Check if the character at position i can be a potential single char separator considering what's after it. :param i: :type i: int :param input_string: :type input_string: str :return: :rtype: bool """ return i + 2 >= len(input_string) or \ input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps def cleanup(input_string): """ Removes and strip separators from input_string (but keep ',;' characters) It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.) :param input_string: :type input_string: str :return: :rtype: """ clean_string = input_string for char in clean_chars: clean_string = clean_string.replace(char, ' ') # Restore input separator if they separate single characters. # Useful for Mavels Agents of S.H.I.E.L.D. # https://github.com/guessit-io/guessit/issues/278 indices = [i for i, letter in enumerate(clean_string) if letter in seps] dots = set() if indices: clean_list = list(clean_string) potential_indices = [] for i in indices: if _potential_before(i, input_string) and _potential_after(i, input_string): potential_indices.append(i) replace_indices = [] for potential_index in potential_indices: if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices: replace_indices.append(potential_index) if replace_indices: for replace_index in replace_indices: dots.add(input_string[replace_index]) clean_list[replace_index] = input_string[replace_index] clean_string = ''.join(clean_list) clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots])) clean_string = re.sub(' +', ' ', clean_string) return clean_string def strip(input_string, chars=seps): """ Strip separators from input_string :param input_string: :param chars: :type input_string: :return: :rtype: """ return input_string.strip(chars) def raw_cleanup(raw): """ Cleanup a raw value to perform raw comparison :param raw: :type raw: :return: :rtype: """ return formatters(cleanup, strip)(raw.lower()) def reorder_title(title, articles=('the',), separators=(',', ', ')): """ Reorder the title :param title: :type title: :param articles: :type articles: :param separators: :type separators: :return: :rtype: """ ltitle = title.lower() for article in articles: for separator in separators: suffix = separator + article if ltitle[-len(suffix):] == suffix: return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)] return title