mirror of https://github.com/evilhero/mylar
144 lines
4.8 KiB
Python
144 lines
4.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
from __future__ import (
|
|
print_function,
|
|
division,
|
|
unicode_literals,
|
|
absolute_import
|
|
)
|
|
|
|
# Std. lib imports.
|
|
import sys
|
|
|
|
# Local imports.
|
|
from natsort.compat.py23 import (
|
|
PY_VERSION,
|
|
cmp_to_key,
|
|
py23_unichr,
|
|
)
|
|
|
|
# This string should be sorted after any other byte string because
|
|
# it contains the max unicode character repeated 20 times.
|
|
# You would need some odd data to come after that.
|
|
null_string = ''
|
|
null_string_max = py23_unichr(sys.maxunicode) * 20
|
|
|
|
# Make the strxfrm function from strcoll on Python2
|
|
# It can be buggy (especially on BSD-based systems),
|
|
# so prefer icu if available.
|
|
try:
|
|
import icu
|
|
from locale import getlocale
|
|
|
|
null_string_locale = b''
|
|
|
|
# This string should in theory be sorted after any other byte
|
|
# string because it contains the max byte char repeated many times.
|
|
# You would need some odd data to come after that.
|
|
null_string_locale_max = b'x7f' * 50
|
|
|
|
def dumb_sort():
|
|
return False
|
|
|
|
# If using icu, get the locale from the current global locale,
|
|
def get_icu_locale():
|
|
try:
|
|
return icu.Locale('.'.join(getlocale()))
|
|
except TypeError: # pragma: no cover
|
|
return icu.Locale()
|
|
|
|
def get_strxfrm():
|
|
return icu.Collator.createInstance(get_icu_locale()).getSortKey
|
|
|
|
def get_thousands_sep():
|
|
sep = icu.DecimalFormatSymbols.kGroupingSeparatorSymbol
|
|
return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep)
|
|
|
|
def get_decimal_point():
|
|
sep = icu.DecimalFormatSymbols.kDecimalSeparatorSymbol
|
|
return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep)
|
|
|
|
except ImportError:
|
|
import locale
|
|
if PY_VERSION < 3:
|
|
from locale import strcoll
|
|
sentinel = object()
|
|
|
|
def custom_strcoll(a, b, last=sentinel):
|
|
"""strcoll that can handle a sentinel that is always last."""
|
|
if a is last:
|
|
return 0 if a is b else 1
|
|
elif b is last: # a cannot also be sentinel b/c above logic
|
|
return -1
|
|
else: # neither are sentinel
|
|
return strcoll(a, b)
|
|
|
|
strxfrm = cmp_to_key(custom_strcoll)
|
|
null_string_locale = strxfrm('')
|
|
null_string_locale_max = strxfrm(sentinel)
|
|
else:
|
|
from locale import strxfrm
|
|
null_string_locale = ''
|
|
|
|
# This string should be sorted after any other byte string because
|
|
# it contains the max unicode character repeated 20 times.
|
|
# You would need some odd data to come after that.
|
|
null_string_locale_max = py23_unichr(sys.maxunicode) * 20
|
|
|
|
# On some systems, locale is broken and does not sort in the expected
|
|
# order. We will try to detect this and compensate.
|
|
def dumb_sort():
|
|
return strxfrm('A') < strxfrm('a')
|
|
|
|
def get_strxfrm():
|
|
return strxfrm
|
|
|
|
def get_thousands_sep():
|
|
sep = locale.localeconv()['thousands_sep']
|
|
# If this locale library is broken, some of the thousands separator
|
|
# characters are incorrectly blank. Here is a lookup table of the
|
|
# corrections I am aware of.
|
|
if dumb_sort():
|
|
try:
|
|
loc = '.'.join(locale.getlocale())
|
|
except TypeError: # No locale loaded, default to ','
|
|
return ','
|
|
return {'de_DE.ISO8859-15': '.',
|
|
'es_ES.ISO8859-1': '.',
|
|
'de_AT.ISO8859-1': '.',
|
|
'de_at': '\xa0',
|
|
'nl_NL.UTF-8': '.',
|
|
'es_es': '.',
|
|
'fr_CH.ISO8859-15': '\xa0',
|
|
'fr_CA.ISO8859-1': '\xa0',
|
|
'de_CH.ISO8859-1': '.',
|
|
'fr_FR.ISO8859-15': '\xa0',
|
|
'nl_NL.ISO8859-1': '.',
|
|
'ca_ES.UTF-8': '.',
|
|
'nl_NL.ISO8859-15': '.',
|
|
'de_ch': "'",
|
|
'ca_es': '.',
|
|
'de_AT.ISO8859-15': '.',
|
|
'ca_ES.ISO8859-1': '.',
|
|
'de_AT.UTF-8': '.',
|
|
'es_ES.UTF-8': '.',
|
|
'fr_fr': '\xa0',
|
|
'es_ES.ISO8859-15': '.',
|
|
'de_DE.ISO8859-1': '.',
|
|
'nl_nl': '.',
|
|
'fr_ch': '\xa0',
|
|
'fr_ca': '\xa0',
|
|
'de_DE.UTF-8': '.',
|
|
'ca_ES.ISO8859-15': '.',
|
|
'de_CH.ISO8859-15': '.',
|
|
'fr_FR.ISO8859-1': '\xa0',
|
|
'fr_CH.ISO8859-1': '\xa0',
|
|
'de_de': '.',
|
|
'fr_FR.UTF-8': '\xa0',
|
|
'fr_CA.ISO8859-15': '\xa0',
|
|
}.get(loc, sep)
|
|
else:
|
|
return sep
|
|
|
|
def get_decimal_point():
|
|
return locale.localeconv()['decimal_point']
|