bazarr/libs/pycountry/__init__.py

# vim:fileencoding=utf-8
"""pycountry"""

import os.path
import unicodedata

import pycountry.db

try:
    import pkg_resources

    resource_filename = pkg_resources.resource_filename
except ImportError:

    def resource_filename(package_or_requirement, resource_name):
        return os.path.join(os.path.dirname(__file__), resource_name)

else:
    try:
        __version__ = pkg_resources.get_distribution("pycountry").version
    except pkg_resources.DistributionNotFound:
        __version__ = "n/a"


LOCALES_DIR = resource_filename("pycountry", "locales")
DATABASE_DIR = resource_filename("pycountry", "databases")


def remove_accents(input_str):
    # Borrowed from https://stackoverflow.com/a/517974/1509718
    nfkd_form = unicodedata.normalize("NFKD", input_str)
    return "".join([c for c in nfkd_form if not unicodedata.combining(c)])


class ExistingCountries(pycountry.db.Database):
    """Provides access to an ISO 3166 database (Countries)."""

    data_class_name = "Country"
    root_key = "3166-1"

    def search_fuzzy(self, query):
        query = remove_accents(query.strip().lower())

        # A country-code to points mapping for later sorting countries
        # based on the query's matching incidence.
        results = {}

        def add_result(country, points):
            results.setdefault(country.alpha_2, 0)
            results[country.alpha_2] += points

        # Prio 1: exact matches on country names
        try:
            add_result(self.lookup(query), 50)
        except LookupError:
            pass

        # Prio 2: exact matches on subdivision names
        for candidate in subdivisions:
            for v in candidate._fields.values():
                if v is None:
                    continue
                v = remove_accents(v.lower())
                # Some names include alternative versions which we want to
                # match exactly.
                for v in v.split(";"):
                    if v == query:
                        add_result(candidate.country, 49)
                        break

        # Prio 3: partial matches on country names
        for candidate in self:
            # Higher priority for a match on the common name
            for v in [
                candidate._fields.get("name"),
                candidate._fields.get("official_name"),
                candidate._fields.get("comment"),
            ]:
                if v is None:
                    continue
                v = remove_accents(v.lower())
                if query in v:
                    # This prefers countries with a match early in their name
                    # and also balances against countries with a number of
                    # partial matches and their name containing 'new' in the
                    # middle
                    add_result(candidate, max([5, 30 - (2 * v.find(query))]))
                    break

        # Prio 4: partial matches on subdivision names
        for candidate in subdivisions:
            v = candidate._fields.get("name")
            if v is None:
                continue
            v = remove_accents(v.lower())
            if query in v:
                add_result(candidate.country, max([1, 5 - v.find(query)]))

        if not results:
            raise LookupError(query)

        results = [
            self.get(alpha_2=x[0])
            # sort by points first, by alpha2 code second, and to ensure stable
            # results the negative value allows us to sort reversely on the
            # points but ascending on the country code.
            for x in sorted(results.items(), key=lambda x: (-x[1], x[0]))
        ]
        return results


class HistoricCountries(ExistingCountries):
    """Provides access to an ISO 3166-3 database
    (Countries that have been removed from the standard)."""

    data_class_name = "Country"
    root_key = "3166-3"


class Scripts(pycountry.db.Database):
    """Provides access to an ISO 15924 database (Scripts)."""

    data_class_name = "Script"
    root_key = "15924"


class Currencies(pycountry.db.Database):
    """Provides access to an ISO 4217 database (Currencies)."""

    data_class_name = "Currency"
    root_key = "4217"


class Languages(pycountry.db.Database):
    """Provides access to an ISO 639-1/2T/3 database (Languages)."""

    no_index = ["status", "scope", "type", "inverted_name", "common_name"]
    data_class_name = "Language"
    root_key = "639-3"


class LanguageFamilies(pycountry.db.Database):
    """Provides access to an ISO 639-5 database
    (Language Families and Groups)."""

    data_class_name = "LanguageFamily"
    root_key = "639-5"


class Subdivision(pycountry.db.Data):
    def __init__(self, **kw):
        if "parent" in kw:
            kw["parent_code"] = kw["parent"]
        else:
            kw["parent_code"] = None
        super(Subdivision, self).__init__(**kw)
        self.country_code = self.code.split("-")[0]
        if self.parent_code is not None:
            self.parent_code = "%s-%s" % (self.country_code, self.parent_code)

    @property
    def country(self):
        return countries.get(alpha_2=self.country_code)

    @property
    def parent(self):
        if not self.parent_code:
            return None
        return subdivisions.get(code=self.parent_code)


class Subdivisions(pycountry.db.Database):

    # Note: subdivisions can be hierarchical to other subdivisions. The
    # parent_code attribute is related to other subdivisons, *not*
    # the country!

    data_class_base = Subdivision
    data_class_name = "Subdivision"
    no_index = ["name", "parent_code", "parent", "type"]
    root_key = "3166-2"

    def _load(self, *args, **kw):
        super(Subdivisions, self)._load(*args, **kw)

        # Add index for the country code.
        self.indices["country_code"] = {}
        for subdivision in self:
            divs = self.indices["country_code"].setdefault(
                subdivision.country_code.lower(), set()
            )
            divs.add(subdivision)

    def get(self, **kw):
        default = kw.setdefault("default", None)
        subdivisions = super(Subdivisions, self).get(**kw)
        if subdivisions is default and "country_code" in kw:
            # This handles the case where we know about a country but there
            # are no subdivisions: we return an empty list in this case
            # (sticking to the expected type here) instead of None.
            if countries.get(alpha_2=kw["country_code"]) is not None:
                return []
        return subdivisions


countries = ExistingCountries(os.path.join(DATABASE_DIR, "iso3166-1.json"))
subdivisions = Subdivisions(os.path.join(DATABASE_DIR, "iso3166-2.json"))
historic_countries = HistoricCountries(
    os.path.join(DATABASE_DIR, "iso3166-3.json")
)

currencies = Currencies(os.path.join(DATABASE_DIR, "iso4217.json"))

languages = Languages(os.path.join(DATABASE_DIR, "iso639-3.json"))
language_families = LanguageFamilies(
    os.path.join(DATABASE_DIR, "iso639-5.json")
)

scripts = Scripts(os.path.join(DATABASE_DIR, "iso15924.json"))
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`# vim:fileencoding=utf-8`
			`"""pycountry"""`

			`import os.path`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`import unicodedata`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`import pycountry.db`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`try:`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`import pkg_resources`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`resource_filename = pkg_resources.resource_filename`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`except ImportError:`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`def resource_filename(package_or_requirement, resource_name):`
			`return os.path.join(os.path.dirname(__file__), resource_name)`

Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`else:`
			`try:`
			`__version__ = pkg_resources.get_distribution("pycountry").version`
			`except pkg_resources.DistributionNotFound:`
			`__version__ = "n/a"`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00
			`LOCALES_DIR = resource_filename("pycountry", "locales")`
			`DATABASE_DIR = resource_filename("pycountry", "databases")`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00

Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`def remove_accents(input_str):`
			`# Borrowed from https://stackoverflow.com/a/517974/1509718`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`nfkd_form = unicodedata.normalize("NFKD", input_str)`
			`return "".join([c for c in nfkd_form if not unicodedata.combining(c)])`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00

Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`class ExistingCountries(pycountry.db.Database):`
			`"""Provides access to an ISO 3166 database (Countries)."""`

Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`data_class_name = "Country"`
			`root_key = "3166-1"`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`def search_fuzzy(self, query):`
			`query = remove_accents(query.strip().lower())`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`# A country-code to points mapping for later sorting countries`
			`# based on the query's matching incidence.`
			`results = {}`

			`def add_result(country, points):`
			`results.setdefault(country.alpha_2, 0)`
			`results[country.alpha_2] += points`

			`# Prio 1: exact matches on country names`
			`try:`
			`add_result(self.lookup(query), 50)`
			`except LookupError:`
			`pass`

			`# Prio 2: exact matches on subdivision names`
			`for candidate in subdivisions:`
			`for v in candidate._fields.values():`
			`if v is None:`
			`continue`
			`v = remove_accents(v.lower())`
			`# Some names include alternative versions which we want to`
			`# match exactly.`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`for v in v.split(";"):`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`if v == query:`
			`add_result(candidate.country, 49)`
			`break`

			`# Prio 3: partial matches on country names`
			`for candidate in self:`
			`# Higher priority for a match on the common name`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`for v in [`
			`candidate._fields.get("name"),`
			`candidate._fields.get("official_name"),`
			`candidate._fields.get("comment"),`
			`]:`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`if v is None:`
			`continue`
			`v = remove_accents(v.lower())`
			`if query in v:`
			`# This prefers countries with a match early in their name`
			`# and also balances against countries with a number of`
			`# partial matches and their name containing 'new' in the`
			`# middle`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`add_result(candidate, max([5, 30 - (2 * v.find(query))]))`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`break`

			`# Prio 4: partial matches on subdivision names`
			`for candidate in subdivisions:`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`v = candidate._fields.get("name")`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`if v is None:`
			`continue`
			`v = remove_accents(v.lower())`
			`if query in v:`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`add_result(candidate.country, max([1, 5 - v.find(query)]))`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00
			`if not results:`
			`raise LookupError(query)`

			`results = [`
			`self.get(alpha_2=x[0])`
			`# sort by points first, by alpha2 code second, and to ensure stable`
			`# results the negative value allows us to sort reversely on the`
			`# points but ascending on the country code.`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`for x in sorted(results.items(), key=lambda x: (-x[1], x[0]))`
			`]`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`return results`


			`class HistoricCountries(ExistingCountries):`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`"""Provides access to an ISO 3166-3 database`
			`(Countries that have been removed from the standard)."""`

Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`data_class_name = "Country"`
			`root_key = "3166-3"`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00

			`class Scripts(pycountry.db.Database):`
			`"""Provides access to an ISO 15924 database (Scripts)."""`

Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`data_class_name = "Script"`
			`root_key = "15924"`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00

			`class Currencies(pycountry.db.Database):`
			`"""Provides access to an ISO 4217 database (Currencies)."""`

Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`data_class_name = "Currency"`
			`root_key = "4217"`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00

			`class Languages(pycountry.db.Database):`
			`"""Provides access to an ISO 639-1/2T/3 database (Languages)."""`

Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`no_index = ["status", "scope", "type", "inverted_name", "common_name"]`
			`data_class_name = "Language"`
			`root_key = "639-3"`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00

Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`class LanguageFamilies(pycountry.db.Database):`
			`"""Provides access to an ISO 639-5 database`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`(Language Families and Groups)."""`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`data_class_name = "LanguageFamily"`
			`root_key = "639-5"`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00

Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`class Subdivision(pycountry.db.Data):`
			`def __init__(self, **kw):`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`if "parent" in kw:`
			`kw["parent_code"] = kw["parent"]`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`else:`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`kw["parent_code"] = None`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`super(Subdivision, self).__init__(**kw)`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`self.country_code = self.code.split("-")[0]`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`if self.parent_code is not None:`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`self.parent_code = "%s-%s" % (self.country_code, self.parent_code)`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00
			`@property`
			`def country(self):`
			`return countries.get(alpha_2=self.country_code)`

			`@property`
			`def parent(self):`
			`if not self.parent_code:`
			`return None`
			`return subdivisions.get(code=self.parent_code)`


			`class Subdivisions(pycountry.db.Database):`

			`# Note: subdivisions can be hierarchical to other subdivisions. The`
			`# parent_code attribute is related to other subdivisons, not`
			`# the country!`

			`data_class_base = Subdivision`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`data_class_name = "Subdivision"`
			`no_index = ["name", "parent_code", "parent", "type"]`
			`root_key = "3166-2"`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00
			`def _load(self, args, *kw):`
			`super(Subdivisions, self)._load(args, *kw)`

			`# Add index for the country code.`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`self.indices["country_code"] = {}`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`for subdivision in self:`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`divs = self.indices["country_code"].setdefault(`
			`subdivision.country_code.lower(), set()`
			`)`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`divs.add(subdivision)`

			`def get(self, **kw):`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`default = kw.setdefault("default", None)`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`subdivisions = super(Subdivisions, self).get(**kw)`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`if subdivisions is default and "country_code" in kw:`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`# This handles the case where we know about a country but there`
			`# are no subdivisions: we return an empty list in this case`
			`# (sticking to the expected type here) instead of None.`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`if countries.get(alpha_2=kw["country_code"]) is not None:`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`return []`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`return subdivisions`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00

Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`countries = ExistingCountries(os.path.join(DATABASE_DIR, "iso3166-1.json"))`
			`subdivisions = Subdivisions(os.path.join(DATABASE_DIR, "iso3166-2.json"))`
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`historic_countries = HistoricCountries(`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`os.path.join(DATABASE_DIR, "iso3166-3.json")`
			`)`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`currencies = Currencies(os.path.join(DATABASE_DIR, "iso4217.json"))`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`languages = Languages(os.path.join(DATABASE_DIR, "iso639-3.json"))`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`language_families = LanguageFamilies(`
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`os.path.join(DATABASE_DIR, "iso639-5.json")`
			`)`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00
Updated vendored dependencies. 2022-11-07 18:06:49 +00:00			`scripts = Scripts(os.path.join(DATABASE_DIR, "iso15924.json"))`