bazarr/libs/pycountry/__init__.py

# vim:fileencoding=utf-8
"""pycountry"""

import os.path
import unicodedata
import pycountry.db


try:
    import pkg_resources
    resource_filename = pkg_resources.resource_filename
    __version__ = pkg_resources.get_distribution("pycountry").version
except (ImportError, pkg_resources.DistributionNotFound):
    __version__ = 'n/a'

    def resource_filename(package_or_requirement, resource_name):
        return os.path.join(os.path.dirname(__file__), resource_name)


LOCALES_DIR = resource_filename('pycountry', 'locales')
DATABASE_DIR = resource_filename('pycountry', 'databases')


def remove_accents(input_str):
    # Borrowed from https://stackoverflow.com/a/517974/1509718
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])


class ExistingCountries(pycountry.db.Database):
    """Provides access to an ISO 3166 database (Countries)."""

    data_class_name = 'Country'
    root_key = '3166-1'

    def search_fuzzy(self, query):
        query = remove_accents(query.strip().lower())

        # A country-code to points mapping for later sorting countries
        # based on the query's matching incidence.
        results = {}

        def add_result(country, points):
            results.setdefault(country.alpha_2, 0)
            results[country.alpha_2] += points

        # Prio 1: exact matches on country names
        try:
            add_result(self.lookup(query), 50)
        except LookupError:
            pass

        # Prio 2: exact matches on subdivision names
        for candidate in subdivisions:
            for v in candidate._fields.values():
                if v is None:
                    continue
                v = remove_accents(v.lower())
                # Some names include alternative versions which we want to
                # match exactly.
                for v in v.split(';'):
                    if v == query:
                        add_result(candidate.country, 49)
                        break

        # Prio 3: partial matches on country names
        for candidate in self:
            # Higher priority for a match on the common name
            for v in [candidate._fields.get('name'),
                      candidate._fields.get('official_name'),
                      candidate._fields.get('comment')]:
                if v is None:
                    continue
                v = remove_accents(v.lower())
                if query in v:
                    # This prefers countries with a match early in their name
                    # and also balances against countries with a number of
                    # partial matches and their name containing 'new' in the
                    # middle
                    add_result(candidate, max([5, 30-(2*v.find(query))]))
                    break

        # Prio 4: partial matches on subdivision names
        for candidate in subdivisions:
            v = candidate._fields.get('name')
            if v is None:
                continue
            v = remove_accents(v.lower())
            if query in v:
                add_result(candidate.country, max([1, 5-v.find(query)]))

        if not results:
            raise LookupError(query)

        results = [
            self.get(alpha_2=x[0])
            # sort by points first, by alpha2 code second, and to ensure stable
            # results the negative value allows us to sort reversely on the
            # points but ascending on the country code.
            for x in sorted(results.items(),
                            key=lambda x: (-x[1], x[0]))]
        return results


class HistoricCountries(ExistingCountries):
    """Provides access to an ISO 3166-3 database
    (Countries that have been removed from the standard)."""

    data_class_name = 'Country'
    root_key = '3166-3'


class Scripts(pycountry.db.Database):
    """Provides access to an ISO 15924 database (Scripts)."""

    data_class_name = 'Script'
    root_key = '15924'


class Currencies(pycountry.db.Database):
    """Provides access to an ISO 4217 database (Currencies)."""

    data_class_name = 'Currency'
    root_key = '4217'


class Languages(pycountry.db.Database):
    """Provides access to an ISO 639-1/2T/3 database (Languages)."""

    no_index = ['status', 'scope', 'type', 'inverted_name', 'common_name']
    data_class_name = 'Language'
    root_key = '639-3'


class LanguageFamilies(pycountry.db.Database):
    """Provides access to an ISO 639-5 database
       (Language Families and Groups)."""

    data_class_name = 'LanguageFamily'
    root_key = '639-5'


class Subdivision(pycountry.db.Data):

    def __init__(self, **kw):
        if 'parent' in kw:
            kw['parent_code'] = kw['parent']
        else:
            kw['parent_code'] = None
        super(Subdivision, self).__init__(**kw)
        self.country_code = self.code.split('-')[0]
        if self.parent_code is not None:
            self.parent_code = '%s-%s' % (self.country_code, self.parent_code)

    @property
    def country(self):
        return countries.get(alpha_2=self.country_code)

    @property
    def parent(self):
        if not self.parent_code:
            return None
        return subdivisions.get(code=self.parent_code)


class Subdivisions(pycountry.db.Database):

    # Note: subdivisions can be hierarchical to other subdivisions. The
    # parent_code attribute is related to other subdivisons, *not*
    # the country!

    data_class_base = Subdivision
    data_class_name = 'Subdivision'
    no_index = ['name', 'parent_code', 'parent', 'type']
    root_key = '3166-2'

    def _load(self, *args, **kw):
        super(Subdivisions, self)._load(*args, **kw)

        # Add index for the country code.
        self.indices['country_code'] = {}
        for subdivision in self:
            divs = self.indices['country_code'].setdefault(
                subdivision.country_code.lower(), set())
            divs.add(subdivision)

    def get(self, **kw):
        default = kw.setdefault('default', None)
        subdivisions = super(Subdivisions, self).get(**kw)
        if subdivisions is default and 'country_code' in kw:
            # This handles the case where we know about a country but there
            # are no subdivisions: we return an empty list in this case
            # (sticking to the expected type here) instead of None.
            if countries.get(alpha_2=kw['country_code']) is not None:
                return []
        return subdivisions


countries = ExistingCountries(os.path.join(DATABASE_DIR, 'iso3166-1.json'))
subdivisions = Subdivisions(os.path.join(DATABASE_DIR, 'iso3166-2.json'))
historic_countries = HistoricCountries(
    os.path.join(DATABASE_DIR, 'iso3166-3.json'))

currencies = Currencies(os.path.join(DATABASE_DIR, 'iso4217.json'))

languages = Languages(os.path.join(DATABASE_DIR, 'iso639-3.json'))
language_families = LanguageFamilies(
    os.path.join(DATABASE_DIR, 'iso639-5.json'))

scripts = Scripts(os.path.join(DATABASE_DIR, 'iso15924.json'))