mylar/lib/natsort/compat/locale.py

144 lines
4.8 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import (
print_function,
division,
unicode_literals,
absolute_import
)
# Std. lib imports.
import sys
# Local imports.
from natsort.compat.py23 import (
PY_VERSION,
cmp_to_key,
py23_unichr,
)
# This string should be sorted after any other byte string because
# it contains the max unicode character repeated 20 times.
# You would need some odd data to come after that.
null_string = ''
null_string_max = py23_unichr(sys.maxunicode) * 20
# Make the strxfrm function from strcoll on Python2
# It can be buggy (especially on BSD-based systems),
# so prefer icu if available.
try:
import icu
from locale import getlocale
null_string_locale = b''
# This string should in theory be sorted after any other byte
# string because it contains the max byte char repeated many times.
# You would need some odd data to come after that.
null_string_locale_max = b'x7f' * 50
def dumb_sort():
return False
# If using icu, get the locale from the current global locale,
def get_icu_locale():
try:
return icu.Locale('.'.join(getlocale()))
except TypeError: # pragma: no cover
return icu.Locale()
def get_strxfrm():
return icu.Collator.createInstance(get_icu_locale()).getSortKey
def get_thousands_sep():
sep = icu.DecimalFormatSymbols.kGroupingSeparatorSymbol
return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep)
def get_decimal_point():
sep = icu.DecimalFormatSymbols.kDecimalSeparatorSymbol
return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep)
except ImportError:
import locale
if PY_VERSION < 3:
from locale import strcoll
sentinel = object()
def custom_strcoll(a, b, last=sentinel):
"""strcoll that can handle a sentinel that is always last."""
if a is last:
return 0 if a is b else 1
elif b is last: # a cannot also be sentinel b/c above logic
return -1
else: # neither are sentinel
return strcoll(a, b)
strxfrm = cmp_to_key(custom_strcoll)
null_string_locale = strxfrm('')
null_string_locale_max = strxfrm(sentinel)
else:
from locale import strxfrm
null_string_locale = ''
# This string should be sorted after any other byte string because
# it contains the max unicode character repeated 20 times.
# You would need some odd data to come after that.
null_string_locale_max = py23_unichr(sys.maxunicode) * 20
# On some systems, locale is broken and does not sort in the expected
# order. We will try to detect this and compensate.
def dumb_sort():
return strxfrm('A') < strxfrm('a')
def get_strxfrm():
return strxfrm
def get_thousands_sep():
sep = locale.localeconv()['thousands_sep']
# If this locale library is broken, some of the thousands separator
# characters are incorrectly blank. Here is a lookup table of the
# corrections I am aware of.
if dumb_sort():
try:
loc = '.'.join(locale.getlocale())
except TypeError: # No locale loaded, default to ','
return ','
return {'de_DE.ISO8859-15': '.',
'es_ES.ISO8859-1': '.',
'de_AT.ISO8859-1': '.',
'de_at': '\xa0',
'nl_NL.UTF-8': '.',
'es_es': '.',
'fr_CH.ISO8859-15': '\xa0',
'fr_CA.ISO8859-1': '\xa0',
'de_CH.ISO8859-1': '.',
'fr_FR.ISO8859-15': '\xa0',
'nl_NL.ISO8859-1': '.',
'ca_ES.UTF-8': '.',
'nl_NL.ISO8859-15': '.',
'de_ch': "'",
'ca_es': '.',
'de_AT.ISO8859-15': '.',
'ca_ES.ISO8859-1': '.',
'de_AT.UTF-8': '.',
'es_ES.UTF-8': '.',
'fr_fr': '\xa0',
'es_ES.ISO8859-15': '.',
'de_DE.ISO8859-1': '.',
'nl_nl': '.',
'fr_ch': '\xa0',
'fr_ca': '\xa0',
'de_DE.UTF-8': '.',
'ca_ES.ISO8859-15': '.',
'de_CH.ISO8859-15': '.',
'fr_FR.ISO8859-1': '\xa0',
'fr_CH.ISO8859-1': '\xa0',
'de_de': '.',
'fr_FR.UTF-8': '\xa0',
'fr_CA.ISO8859-15': '\xa0',
}.get(loc, sep)
else:
return sep
def get_decimal_point():
return locale.localeconv()['decimal_point']