2018-09-17 00:27:00 +00:00
|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
"""
|
|
|
|
|
country property
|
|
|
|
|
"""
|
|
|
|
|
# pylint: disable=no-member
|
|
|
|
|
import babelfish
|
|
|
|
|
|
|
|
|
|
from rebulk import Rebulk
|
2018-10-06 17:18:55 +00:00
|
|
|
|
from ..common.words import COMMON_WORDS, iter_words
|
2018-09-17 00:27:00 +00:00
|
|
|
|
|
|
|
|
|
|
2018-10-06 17:18:55 +00:00
|
|
|
|
def country():
|
2018-09-17 00:27:00 +00:00
|
|
|
|
"""
|
|
|
|
|
Builder for rebulk object.
|
|
|
|
|
:return: Created Rebulk object
|
|
|
|
|
:rtype: Rebulk
|
|
|
|
|
"""
|
2018-10-06 17:18:55 +00:00
|
|
|
|
rebulk = Rebulk().defaults(name='country')
|
2018-09-17 00:27:00 +00:00
|
|
|
|
|
|
|
|
|
rebulk.functional(find_countries,
|
|
|
|
|
# Prefer language and any other property over country if not US or GB.
|
|
|
|
|
conflict_solver=lambda match, other: match
|
2018-10-06 17:18:55 +00:00
|
|
|
|
if other.name != 'language' or match.value not in [babelfish.Country('US'),
|
|
|
|
|
babelfish.Country('GB')]
|
2018-09-17 00:27:00 +00:00
|
|
|
|
else other,
|
2018-10-06 17:18:55 +00:00
|
|
|
|
properties={'country': [None]})
|
2018-09-17 00:27:00 +00:00
|
|
|
|
|
|
|
|
|
return rebulk
|
|
|
|
|
|
|
|
|
|
|
2018-10-06 17:18:55 +00:00
|
|
|
|
COUNTRIES_SYN = {'ES': ['españa'],
|
|
|
|
|
'GB': ['UK'],
|
|
|
|
|
'BR': ['brazilian', 'bra'],
|
|
|
|
|
'CA': ['québec', 'quebec', 'qc'],
|
|
|
|
|
# FIXME: this one is a bit of a stretch, not sure how to do it properly, though...
|
|
|
|
|
'MX': ['Latinoamérica', 'latin america']}
|
|
|
|
|
|
|
|
|
|
|
2018-09-17 00:27:00 +00:00
|
|
|
|
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
|
2018-10-06 17:18:55 +00:00
|
|
|
|
def __init__(self):
|
2018-09-17 00:27:00 +00:00
|
|
|
|
self.guessit_exceptions = {}
|
|
|
|
|
|
2018-10-06 17:18:55 +00:00
|
|
|
|
for alpha2, synlist in COUNTRIES_SYN.items():
|
2018-09-17 00:27:00 +00:00
|
|
|
|
for syn in synlist:
|
|
|
|
|
self.guessit_exceptions[syn.lower()] = alpha2
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def codes(self): # pylint: disable=missing-docstring
|
|
|
|
|
return (babelfish.country_converters['name'].codes |
|
|
|
|
|
frozenset(babelfish.COUNTRIES.values()) |
|
|
|
|
|
frozenset(self.guessit_exceptions.keys()))
|
|
|
|
|
|
|
|
|
|
def convert(self, alpha2):
|
|
|
|
|
if alpha2 == 'GB':
|
|
|
|
|
return 'UK'
|
|
|
|
|
return str(babelfish.Country(alpha2))
|
|
|
|
|
|
|
|
|
|
def reverse(self, name): # pylint:disable=arguments-differ
|
|
|
|
|
# exceptions come first, as they need to override a potential match
|
|
|
|
|
# with any of the other guessers
|
|
|
|
|
try:
|
|
|
|
|
return self.guessit_exceptions[name.lower()]
|
|
|
|
|
except KeyError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
return babelfish.Country(name.upper()).alpha2
|
|
|
|
|
except ValueError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
for conv in [babelfish.Country.fromname]:
|
|
|
|
|
try:
|
|
|
|
|
return conv(name).alpha2
|
|
|
|
|
except babelfish.CountryReverseError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
raise babelfish.CountryReverseError(name)
|
|
|
|
|
|
|
|
|
|
|
2018-10-06 17:18:55 +00:00
|
|
|
|
babelfish.country_converters['guessit'] = GuessitCountryConverter()
|
|
|
|
|
|
2018-09-17 00:27:00 +00:00
|
|
|
|
|
2018-10-06 17:18:55 +00:00
|
|
|
|
def is_allowed_country(country_object, context=None):
|
|
|
|
|
"""
|
|
|
|
|
Check if country is allowed.
|
|
|
|
|
"""
|
|
|
|
|
if context and context.get('allowed_countries'):
|
|
|
|
|
allowed_countries = context.get('allowed_countries')
|
|
|
|
|
return country_object.name.lower() in allowed_countries or country_object.alpha2.lower() in allowed_countries
|
|
|
|
|
return True
|
2018-09-17 00:27:00 +00:00
|
|
|
|
|
|
|
|
|
|
2018-10-06 17:18:55 +00:00
|
|
|
|
def find_countries(string, context=None):
|
|
|
|
|
"""
|
|
|
|
|
Find countries in given string.
|
|
|
|
|
"""
|
|
|
|
|
ret = []
|
|
|
|
|
for word_match in iter_words(string.strip().lower()):
|
|
|
|
|
word = word_match.value
|
|
|
|
|
if word.lower() in COMMON_WORDS:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
country_object = babelfish.Country.fromguessit(word)
|
|
|
|
|
if is_allowed_country(country_object, context):
|
|
|
|
|
ret.append((word_match.span[0], word_match.span[1], {'value': country_object}))
|
|
|
|
|
except babelfish.Error:
|
|
|
|
|
continue
|
|
|
|
|
return ret
|