# -*- coding: utf-8 -*- """ Natsort can sort strings with numbers in a natural order. It provides the natsorted function to sort strings with arbitrary numbers. You can mix types with natsorted. This can get around the new 'unorderable types' issue with Python 3. Natsort will recursively descend into lists of lists so you can sort by the sublist contents. See the README or the natsort homepage for more details. """ from __future__ import ( print_function, division, unicode_literals, absolute_import ) # Std lib. imports. from operator import itemgetter from functools import partial from warnings import warn # Local imports. import sys import natsort.compat.locale from natsort.ns_enum import ns from natsort.compat.py23 import ( u_format, py23_str, py23_cmp) from natsort.utils import ( _natsort_key, _args_to_enum, _do_decoding, _regex_chooser, _parse_string_factory, _parse_path_factory, _parse_number_factory, _parse_bytes_factory, _input_string_transform_factory, _string_component_transform_factory, _final_data_transform_factory, ) # Make sure the doctest works for either python2 or python3 __doc__ = u_format(__doc__) @u_format def decoder(encoding): """ Return a function that can be used to decode bytes to unicode. Parameters ---------- encoding: str The codec to use for decoding. This must be a valid unicode codec. Returns ------- decode_function: A function that takes a single argument and attempts to decode it using the supplied codec. Any `UnicodeErrors` are raised. If the argument was not of `bytes` type, it is simply returned as-is. See Also -------- as_ascii as_utf8 Examples -------- >>> f = decoder('utf8') >>> f(b'bytes') == 'bytes' True >>> f(12345) == 12345 True >>> # On Python 3, without decoder this would return [b'a10', b'a2'] >>> natsorted([b'a10', b'a2'], key=decoder('utf8')) == [b'a2', b'a10'] True >>> # On Python 3, without decoder this would raise a TypeError. >>> natsorted([b'a10', 'a2'], key=decoder('utf8')) == ['a2', b'a10'] True """ return partial(_do_decoding, encoding=encoding) @u_format def as_ascii(s): """ Function to decode an input with the ASCII codec, or return as-is. Parameters ---------- s: Any object. Returns ------- output: If the input was of type `bytes`, the return value is a `str` decoded with the ASCII codec. Otherwise, the return value is identically the input. See Also -------- decoder """ return _do_decoding(s, 'ascii') @u_format def as_utf8(s): """ Function to decode an input with the UTF-8 codec, or return as-is. Parameters ---------- s: Any object. Returns ------- output: If the input was of type `bytes`, the return value is a `str` decoded with the UTF-8 codec. Otherwise, the return value is identically the input. See Also -------- decoder """ return _do_decoding(s, 'utf-8') def natsort_key(val, key=None, alg=0, **_kwargs): """Undocumented, kept for backwards-compatibility.""" msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" warn(msg, DeprecationWarning) return natsort_keygen(key, alg, **_kwargs)(val) @u_format def natsort_keygen(key=None, alg=0, **_kwargs): """\ Generate a key to sort strings and numbers naturally. Generate a key to sort strings and numbers naturally, not lexicographically. This key is designed for use as the `key` argument to functions such as the `sorted` builtin. The user may customize the generated function with the arguments to `natsort_keygen`, including an optional `key` function. Parameters ---------- key : callable, optional A key used to manipulate the input value before parsing for numbers. It is **not** applied recursively. It should accept a single argument and return a single value. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- out : function A function that parses input for natural sorting that is suitable for passing as the `key` argument to functions such as `sorted`. See Also -------- natsorted Examples -------- `natsort_keygen` is a convenient way to create a custom key to sort lists in-place (for example).:: >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> a.sort(key=natsort_keygen(alg=ns.REAL)) >>> a [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3'] """ # Transform old arguments to the ns enum. try: alg = _args_to_enum(**_kwargs) | alg except TypeError: msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'" raise ValueError(msg+', got {0}'.format(py23_str(alg))) # Add the _DUMB option if the locale library is broken. if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort(): alg |= ns._DUMB # Set some variables that will be passed to the factory functions if alg & ns.NUMAFTER: if alg & ns.LOCALEALPHA: sep = natsort.compat.locale.null_string_locale_max else: sep = natsort.compat.locale.null_string_max pre_sep = natsort.compat.locale.null_string_max else: if alg & ns.LOCALEALPHA: sep = natsort.compat.locale.null_string_locale else: sep = natsort.compat.locale.null_string pre_sep = natsort.compat.locale.null_string regex = _regex_chooser[alg & ns._NUMERIC_ONLY] # Create the functions that will be used to split strings. input_transform = _input_string_transform_factory(alg) component_transform = _string_component_transform_factory(alg) final_transform = _final_data_transform_factory(alg, sep, pre_sep) # Create the high-level parsing functions for strings, bytes, and numbers. string_func = _parse_string_factory( alg, sep, regex.split, input_transform, component_transform, final_transform ) if alg & ns.PATH: string_func = _parse_path_factory(string_func) bytes_func = _parse_bytes_factory(alg) num_func = _parse_number_factory(alg, sep, pre_sep) # Return the natsort key with the parsing path pre-chosen. return partial( _natsort_key, key=key, string_func=string_func, bytes_func=bytes_func, num_func=num_func ) @u_format def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs): """\ Sorts an iterable naturally. Sorts an iterable naturally (alphabetically and numerically), not lexicographically. Returns a list containing a sorted copy of the iterable. Parameters ---------- seq : iterable The iterable to sort. key : callable, optional A key used to determine how to sort each element of the iterable. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- out: list The sorted sequence. See Also -------- natsort_keygen : Generates the key that makes natural sorting possible. realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``. humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``. index_natsorted : Returns the sorted indexes from `natsorted`. Examples -------- Use `natsorted` just like the builtin `sorted`:: >>> a = ['num3', 'num5', 'num2'] >>> natsorted(a) [{u}'num2', {u}'num3', {u}'num5'] """ natsort_key = natsort_keygen(key, alg, **_kwargs) return sorted(seq, reverse=reverse, key=natsort_key) @u_format def versorted(seq, key=None, reverse=False, alg=0, **_kwargs): """\ Identical to :func:`natsorted`. This function exists for backwards compatibility with `natsort` version < 4.0.0. Future development should use :func:`natsorted`. See Also -------- natsorted """ return natsorted(seq, key, reverse, alg, **_kwargs) @u_format def humansorted(seq, key=None, reverse=False, alg=0): """\ Convenience function to properly sort non-numeric characters. Convenience function to properly sort non-numeric characters in a locale-aware fashion (a.k.a "human sorting"). This is a wrapper around ``natsorted(seq, alg=ns.LOCALE)``. Parameters ---------- seq : iterable The sequence to sort. key : callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.LOCALE`. Returns ------- out : list The sorted sequence. See Also -------- index_humansorted : Returns the sorted indexes from `humansorted`. Notes ----- Please read :ref:`locale_issues` before using `humansorted`. Examples -------- Use `humansorted` just like the builtin `sorted`:: >>> a = ['Apple', 'Banana', 'apple', 'banana'] >>> natsorted(a) [{u}'Apple', {u}'Banana', {u}'apple', {u}'banana'] >>> humansorted(a) [{u}'apple', {u}'Apple', {u}'banana', {u}'Banana'] """ return natsorted(seq, key, reverse, alg | ns.LOCALE) @u_format def realsorted(seq, key=None, reverse=False, alg=0): """\ Convenience function to properly sort signed floats. Convenience function to properly sort signed floats within strings (i.e. "a-5.7"). This is a wrapper around ``natsorted(seq, alg=ns.REAL)``. The behavior of :func:`realsorted` for `natsort` version >= 4.0.0 was the default behavior of :func:`natsorted` for `natsort` version < 4.0.0. Parameters ---------- seq : iterable The sequence to sort. key : callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.REAL`. Returns ------- out : list The sorted sequence. See Also -------- index_realsorted : Returns the sorted indexes from `realsorted`. Examples -------- Use `realsorted` just like the builtin `sorted`:: >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> natsorted(a) [{u}'num2', {u}'num5.3', {u}'num5.10', {u}'num-3'] >>> realsorted(a) [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3'] """ return natsorted(seq, key, reverse, alg | ns.REAL) @u_format def index_natsorted(seq, key=None, reverse=False, alg=0, **_kwargs): """\ Return the list of the indexes used to sort the input sequence. Sorts a sequence naturally, but returns a list of sorted the indexes and not the sorted list. This list of indexes can be used to sort multiple lists by the sorted order of the given sequence. Parameters ---------- seq : iterable The sequence to sort. key : callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- out : tuple The ordered indexes of the sequence. See Also -------- natsorted order_by_index Examples -------- Use index_natsorted if you want to sort multiple lists by the sorted order of one list:: >>> a = ['num3', 'num5', 'num2'] >>> b = ['foo', 'bar', 'baz'] >>> index = index_natsorted(a) >>> index [2, 0, 1] >>> # Sort both lists by the sort order of a >>> order_by_index(a, index) [{u}'num2', {u}'num3', {u}'num5'] >>> order_by_index(b, index) [{u}'baz', {u}'foo', {u}'bar'] """ if key is None: newkey = itemgetter(1) else: def newkey(x): return key(itemgetter(1)(x)) # Pair the index and sequence together, then sort by element index_seq_pair = [[x, y] for x, y in enumerate(seq)] index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, alg, **_kwargs)) return [x for x, _ in index_seq_pair] @u_format def index_versorted(seq, key=None, reverse=False, alg=0, **_kwargs): """\ Identical to :func:`index_natsorted`. This function exists for backwards compatibility with ``index_natsort`` version < 4.0.0. Future development should use :func:`index_natsorted`. Please see the :func:`index_natsorted` documentation for use. See Also -------- index_natsorted """ return index_natsorted(seq, key, reverse, alg, **_kwargs) @u_format def index_humansorted(seq, key=None, reverse=False, alg=0): """\ Return the list of the indexes used to sort the input sequence in a locale-aware manner. Sorts a sequence in a locale-aware manner, but returns a list of sorted the indexes and not the sorted list. This list of indexes can be used to sort multiple lists by the sorted order of the given sequence. This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``. Parameters ---------- seq: iterable The sequence to sort. key: callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.LOCALE`. Returns ------- out : tuple The ordered indexes of the sequence. See Also -------- humansorted order_by_index Notes ----- Please read :ref:`locale_issues` before using `humansorted`. Examples -------- Use `index_humansorted` just like the builtin `sorted`:: >>> a = ['Apple', 'Banana', 'apple', 'banana'] >>> index_humansorted(a) [2, 0, 3, 1] """ return index_natsorted(seq, key, reverse, alg | ns.LOCALE) @u_format def index_realsorted(seq, key=None, reverse=False, alg=0): """\ Return the list of the indexes used to sort the input sequence in a locale-aware manner. Sorts a sequence in a locale-aware manner, but returns a list of sorted the indexes and not the sorted list. This list of indexes can be used to sort multiple lists by the sorted order of the given sequence. This is a wrapper around ``index_natsorted(seq, alg=ns.REAL)``. The behavior of :func:`index_realsorted` in `natsort` version >= 4.0.0 was the default behavior of :func:`index_natsorted` for `natsort` version < 4.0.0. Parameters ---------- seq: iterable The sequence to sort. key: callable, optional A key used to determine how to sort each element of the sequence. It is **not** applied recursively. It should accept a single argument and return a single value. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.REAL`. Returns ------- out : tuple The ordered indexes of the sequence. See Also -------- realsorted order_by_index Examples -------- Use `index_realsorted` just like the builtin `sorted`:: >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> index_realsorted(a) [1, 3, 0, 2] """ return index_natsorted(seq, key, reverse, alg | ns.REAL) @u_format def order_by_index(seq, index, iter=False): """\ Order a given sequence by an index sequence. The output of `index_natsorted` is a sequence of integers (index) that correspond to how its input sequence **would** be sorted. The idea is that this index can be used to reorder multiple sequences by the sorted order of the first sequence. This function is a convenient wrapper to apply this ordering to a sequence. Parameters ---------- seq : sequence The sequence to order. index : iterable The iterable that indicates how to order `seq`. It should be the same length as `seq` and consist of integers only. iter : {{True, False}}, optional If `True`, the ordered sequence is returned as a iterator; otherwise it is returned as a list. The default is `False`. Returns ------- out : {{list, iterator}} The sequence ordered by `index`, as a `list` or as an iterator (depending on the value of `iter`). See Also -------- index_natsorted index_humansorted index_realsorted Examples -------- `order_by_index` is a convenience function that helps you apply the result of `index_natsorted`:: >>> a = ['num3', 'num5', 'num2'] >>> b = ['foo', 'bar', 'baz'] >>> index = index_natsorted(a) >>> index [2, 0, 1] >>> # Sort both lists by the sort order of a >>> order_by_index(a, index) [{u}'num2', {u}'num3', {u}'num5'] >>> order_by_index(b, index) [{u}'baz', {u}'foo', {u}'bar'] """ return (seq[i] for i in index) if iter else [seq[i] for i in index] if float(sys.version[:3]) < 3: # pylint: disable=unused-variable class natcmp(object): """ Compare two objects using a key and an algorithm. Parameters ---------- x : object First object to compare. y : object Second object to compare. alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- out: int 0 if x and y are equal, 1 if x > y, -1 if y > x. See Also -------- natsort_keygen : Generates a key that makes natural sorting possible. Examples -------- Use `natcmp` just like the builtin `cmp`:: >>> one = 1 >>> two = 2 >>> natcmp(one, two) -1 """ cached_keys = {} def __new__(cls, x, y, alg=0, *args, **kwargs): try: alg = _args_to_enum(**kwargs) | alg except TypeError: msg = ("natsort_keygen: 'alg' argument must be " "from the enum 'ns'") raise ValueError(msg + ', got {0}'.format(py23_str(alg))) # Add the _DUMB option if the locale library is broken. if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort(): alg |= ns._DUMB if alg not in cls.cached_keys: cls.cached_keys[alg] = natsort_keygen(alg=alg) return py23_cmp(cls.cached_keys[alg](x), cls.cached_keys[alg](y))