# -*- coding: utf-8 -*- from __future__ import ( print_function, division, unicode_literals, absolute_import ) # Std. lib imports. import sys # Local imports. from natsort.compat.py23 import ( PY_VERSION, cmp_to_key, py23_unichr, ) # This string should be sorted after any other byte string because # it contains the max unicode character repeated 20 times. # You would need some odd data to come after that. null_string = '' null_string_max = py23_unichr(sys.maxunicode) * 20 # Make the strxfrm function from strcoll on Python2 # It can be buggy (especially on BSD-based systems), # so prefer icu if available. try: import icu from locale import getlocale null_string_locale = b'' # This string should in theory be sorted after any other byte # string because it contains the max byte char repeated many times. # You would need some odd data to come after that. null_string_locale_max = b'x7f' * 50 def dumb_sort(): return False # If using icu, get the locale from the current global locale, def get_icu_locale(): try: return icu.Locale('.'.join(getlocale())) except TypeError: # pragma: no cover return icu.Locale() def get_strxfrm(): return icu.Collator.createInstance(get_icu_locale()).getSortKey def get_thousands_sep(): sep = icu.DecimalFormatSymbols.kGroupingSeparatorSymbol return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep) def get_decimal_point(): sep = icu.DecimalFormatSymbols.kDecimalSeparatorSymbol return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep) except ImportError: import locale if PY_VERSION < 3: from locale import strcoll sentinel = object() def custom_strcoll(a, b, last=sentinel): """strcoll that can handle a sentinel that is always last.""" if a is last: return 0 if a is b else 1 elif b is last: # a cannot also be sentinel b/c above logic return -1 else: # neither are sentinel return strcoll(a, b) strxfrm = cmp_to_key(custom_strcoll) null_string_locale = strxfrm('') null_string_locale_max = strxfrm(sentinel) else: from locale import strxfrm null_string_locale = '' # This string should be sorted after any other byte string because # it contains the max unicode character repeated 20 times. # You would need some odd data to come after that. null_string_locale_max = py23_unichr(sys.maxunicode) * 20 # On some systems, locale is broken and does not sort in the expected # order. We will try to detect this and compensate. def dumb_sort(): return strxfrm('A') < strxfrm('a') def get_strxfrm(): return strxfrm def get_thousands_sep(): sep = locale.localeconv()['thousands_sep'] # If this locale library is broken, some of the thousands separator # characters are incorrectly blank. Here is a lookup table of the # corrections I am aware of. if dumb_sort(): try: loc = '.'.join(locale.getlocale()) except TypeError: # No locale loaded, default to ',' return ',' return {'de_DE.ISO8859-15': '.', 'es_ES.ISO8859-1': '.', 'de_AT.ISO8859-1': '.', 'de_at': '\xa0', 'nl_NL.UTF-8': '.', 'es_es': '.', 'fr_CH.ISO8859-15': '\xa0', 'fr_CA.ISO8859-1': '\xa0', 'de_CH.ISO8859-1': '.', 'fr_FR.ISO8859-15': '\xa0', 'nl_NL.ISO8859-1': '.', 'ca_ES.UTF-8': '.', 'nl_NL.ISO8859-15': '.', 'de_ch': "'", 'ca_es': '.', 'de_AT.ISO8859-15': '.', 'ca_ES.ISO8859-1': '.', 'de_AT.UTF-8': '.', 'es_ES.UTF-8': '.', 'fr_fr': '\xa0', 'es_ES.ISO8859-15': '.', 'de_DE.ISO8859-1': '.', 'nl_nl': '.', 'fr_ch': '\xa0', 'fr_ca': '\xa0', 'de_DE.UTF-8': '.', 'ca_ES.ISO8859-15': '.', 'de_CH.ISO8859-15': '.', 'fr_FR.ISO8859-1': '\xa0', 'fr_CH.ISO8859-1': '\xa0', 'de_de': '.', 'fr_FR.UTF-8': '\xa0', 'fr_CA.ISO8859-15': '\xa0', }.get(loc, sep) else: return sep def get_decimal_point(): return locale.localeconv()['decimal_point']