bazarr/libs/git/objects/fun.py

"""Module with functions which are supposed to be as fast as possible"""
from stat import S_ISDIR
from git.compat import (
    byte_ord,
    safe_decode,
    defenc,
    xrange,
    text_type,
    bchr
)

__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
           'traverse_tree_recursive')


def tree_to_stream(entries, write):
    """Write the give list of entries into a stream using its write method
    :param entries: **sorted** list of tuples with (binsha, mode, name)
    :param write: write method which takes a data string"""
    ord_zero = ord('0')
    bit_mask = 7            # 3 bits set

    for binsha, mode, name in entries:
        mode_str = b''
        for i in xrange(6):
            mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str
        # END for each 8 octal value

        # git slices away the first octal if its zero
        if byte_ord(mode_str[0]) == ord_zero:
            mode_str = mode_str[1:]
        # END save a byte

        # here it comes:  if the name is actually unicode, the replacement below
        # will not work as the binsha is not part of the ascii unicode encoding -
        # hence we must convert to an utf8 string for it to work properly.
        # According to my tests, this is exactly what git does, that is it just
        # takes the input literally, which appears to be utf8 on linux.
        if isinstance(name, text_type):
            name = name.encode(defenc)
        write(b''.join((mode_str, b' ', name, b'\0', binsha)))
    # END for each item


def tree_entries_from_data(data):
    """Reads the binary representation of a tree and returns tuples of Tree items
    :param data: data block with tree data (as bytes)
    :return: list(tuple(binsha, mode, tree_relative_path), ...)"""
    ord_zero = ord('0')
    space_ord = ord(' ')
    len_data = len(data)
    i = 0
    out = []
    while i < len_data:
        mode = 0

        # read mode
        # Some git versions truncate the leading 0, some don't
        # The type will be extracted from the mode later
        while byte_ord(data[i]) != space_ord:
            # move existing mode integer up one level being 3 bits
            # and add the actual ordinal value of the character
            mode = (mode << 3) + (byte_ord(data[i]) - ord_zero)
            i += 1
        # END while reading mode

        # byte is space now, skip it
        i += 1

        # parse name, it is NULL separated

        ns = i
        while byte_ord(data[i]) != 0:
            i += 1
        # END while not reached NULL

        # default encoding for strings in git is utf8
        # Only use the respective unicode object if the byte stream was encoded
        name = data[ns:i]
        name = safe_decode(name)

        # byte is NULL, get next 20
        i += 1
        sha = data[i:i + 20]
        i = i + 20
        out.append((sha, mode, name))
    # END for each byte in data stream
    return out


def _find_by_name(tree_data, name, is_dir, start_at):
    """return data entry matching the given name and tree mode
    or None.
    Before the item is returned, the respective data item is set
    None in the tree_data list to mark it done"""
    try:
        item = tree_data[start_at]
        if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
            tree_data[start_at] = None
            return item
    except IndexError:
        pass
    # END exception handling
    for index, item in enumerate(tree_data):
        if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
            tree_data[index] = None
            return item
        # END if item matches
    # END for each item
    return None


def _to_full_path(item, path_prefix):
    """Rebuild entry with given path prefix"""
    if not item:
        return item
    return (item[0], item[1], path_prefix + item[2])


def traverse_trees_recursive(odb, tree_shas, path_prefix):
    """
    :return: list with entries according to the given binary tree-shas.
        The result is encoded in a list
        of n tuple|None per blob/commit, (n == len(tree_shas)), where
        * [0] == 20 byte sha
        * [1] == mode as int
        * [2] == path relative to working tree root
        The entry tuple is None if the respective blob/commit did not
        exist in the given tree.
    :param tree_shas: iterable of shas pointing to trees. All trees must
        be on the same level. A tree-sha may be None in which case None
    :param path_prefix: a prefix to be added to the returned paths on this level,
        set it '' for the first iteration
    :note: The ordering of the returned items will be partially lost"""
    trees_data = []
    nt = len(tree_shas)
    for tree_sha in tree_shas:
        if tree_sha is None:
            data = []
        else:
            data = tree_entries_from_data(odb.stream(tree_sha).read())
        # END handle muted trees
        trees_data.append(data)
    # END for each sha to get data for

    out = []
    out_append = out.append

    # find all matching entries and recursively process them together if the match
    # is a tree. If the match is a non-tree item, put it into the result.
    # Processed items will be set None
    for ti, tree_data in enumerate(trees_data):
        for ii, item in enumerate(tree_data):
            if not item:
                continue
            # END skip already done items
            entries = [None for _ in range(nt)]
            entries[ti] = item
            sha, mode, name = item                          # its faster to unpack @UnusedVariable
            is_dir = S_ISDIR(mode)                          # type mode bits

            # find this item in all other tree data items
            # wrap around, but stop one before our current index, hence
            # ti+nt, not ti+1+nt
            for tio in range(ti + 1, ti + nt):
                tio = tio % nt
                entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
            # END for each other item data

            # if we are a directory, enter recursion
            if is_dir:
                out.extend(traverse_trees_recursive(
                    odb, [((ei and ei[0]) or None) for ei in entries], path_prefix + name + '/'))
            else:
                out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
            # END handle recursion

            # finally mark it done
            tree_data[ii] = None
        # END for each item

        # we are done with one tree, set all its data empty
        del(tree_data[:])
    # END for each tree_data chunk
    return out


def traverse_tree_recursive(odb, tree_sha, path_prefix):
    """
    :return: list of entries of the tree pointed to by the binary tree_sha. An entry
        has the following format:
        * [0] 20 byte sha
        * [1] mode as int
        * [2] path relative to the repository
    :param path_prefix: prefix to prepend to the front of all returned paths"""
    entries = []
    data = tree_entries_from_data(odb.stream(tree_sha).read())

    # unpacking/packing is faster than accessing individual items
    for sha, mode, name in data:
        if S_ISDIR(mode):
            entries.extend(traverse_tree_recursive(odb, sha, path_prefix + name + '/'))
        else:
            entries.append((sha, mode, path_prefix + name))
    # END for each item

    return entries
Include dependencies and remove requirements.txt 2018-09-17 00:27:00 +00:00			`"""Module with functions which are supposed to be as fast as possible"""`
			`from stat import S_ISDIR`
			`from git.compat import (`
			`byte_ord,`
			`safe_decode,`
			`defenc,`
			`xrange,`
			`text_type,`
			`bchr`
			`)`

			`__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',`
			`'traverse_tree_recursive')`


			`def tree_to_stream(entries, write):`
			`"""Write the give list of entries into a stream using its write method`
			`:param entries: sorted list of tuples with (binsha, mode, name)`
			`:param write: write method which takes a data string"""`
			`ord_zero = ord('0')`
			`bit_mask = 7 # 3 bits set`

			`for binsha, mode, name in entries:`
			`mode_str = b''`
			`for i in xrange(6):`
			`mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str`
			`# END for each 8 octal value`

			`# git slices away the first octal if its zero`
			`if byte_ord(mode_str[0]) == ord_zero:`
			`mode_str = mode_str[1:]`
			`# END save a byte`

			`# here it comes: if the name is actually unicode, the replacement below`
			`# will not work as the binsha is not part of the ascii unicode encoding -`
			`# hence we must convert to an utf8 string for it to work properly.`
			`# According to my tests, this is exactly what git does, that is it just`
			`# takes the input literally, which appears to be utf8 on linux.`
			`if isinstance(name, text_type):`
			`name = name.encode(defenc)`
			`write(b''.join((mode_str, b' ', name, b'\0', binsha)))`
			`# END for each item`


			`def tree_entries_from_data(data):`
			`"""Reads the binary representation of a tree and returns tuples of Tree items`
			`:param data: data block with tree data (as bytes)`
			`:return: list(tuple(binsha, mode, tree_relative_path), ...)"""`
			`ord_zero = ord('0')`
			`space_ord = ord(' ')`
			`len_data = len(data)`
			`i = 0`
			`out = []`
			`while i < len_data:`
			`mode = 0`

			`# read mode`
			`# Some git versions truncate the leading 0, some don't`
			`# The type will be extracted from the mode later`
			`while byte_ord(data[i]) != space_ord:`
			`# move existing mode integer up one level being 3 bits`
			`# and add the actual ordinal value of the character`
			`mode = (mode << 3) + (byte_ord(data[i]) - ord_zero)`
			`i += 1`
			`# END while reading mode`

			`# byte is space now, skip it`
			`i += 1`

			`# parse name, it is NULL separated`

			`ns = i`
			`while byte_ord(data[i]) != 0:`
			`i += 1`
			`# END while not reached NULL`

			`# default encoding for strings in git is utf8`
			`# Only use the respective unicode object if the byte stream was encoded`
			`name = data[ns:i]`
			`name = safe_decode(name)`

			`# byte is NULL, get next 20`
			`i += 1`
			`sha = data[i:i + 20]`
			`i = i + 20`
			`out.append((sha, mode, name))`
			`# END for each byte in data stream`
			`return out`


			`def _find_by_name(tree_data, name, is_dir, start_at):`
			`"""return data entry matching the given name and tree mode`
			`or None.`
			`Before the item is returned, the respective data item is set`
			`None in the tree_data list to mark it done"""`
			`try:`
			`item = tree_data[start_at]`
			`if item and item[2] == name and S_ISDIR(item[1]) == is_dir:`
			`tree_data[start_at] = None`
			`return item`
			`except IndexError:`
			`pass`
			`# END exception handling`
			`for index, item in enumerate(tree_data):`
			`if item and item[2] == name and S_ISDIR(item[1]) == is_dir:`
			`tree_data[index] = None`
			`return item`
			`# END if item matches`
			`# END for each item`
			`return None`


			`def _to_full_path(item, path_prefix):`
			`"""Rebuild entry with given path prefix"""`
			`if not item:`
			`return item`
			`return (item[0], item[1], path_prefix + item[2])`


			`def traverse_trees_recursive(odb, tree_shas, path_prefix):`
			`"""`
			`:return: list with entries according to the given binary tree-shas.`
			`The result is encoded in a list`
			`of n tuple\|None per blob/commit, (n == len(tree_shas)), where`
			`* [0] == 20 byte sha`
			`* [1] == mode as int`
			`* [2] == path relative to working tree root`
			`The entry tuple is None if the respective blob/commit did not`
			`exist in the given tree.`
			`:param tree_shas: iterable of shas pointing to trees. All trees must`
			`be on the same level. A tree-sha may be None in which case None`
			`:param path_prefix: a prefix to be added to the returned paths on this level,`
			`set it '' for the first iteration`
			`:note: The ordering of the returned items will be partially lost"""`
			`trees_data = []`
			`nt = len(tree_shas)`
			`for tree_sha in tree_shas:`
			`if tree_sha is None:`
			`data = []`
			`else:`
			`data = tree_entries_from_data(odb.stream(tree_sha).read())`
			`# END handle muted trees`
			`trees_data.append(data)`
			`# END for each sha to get data for`

			`out = []`
			`out_append = out.append`

			`# find all matching entries and recursively process them together if the match`
			`# is a tree. If the match is a non-tree item, put it into the result.`
			`# Processed items will be set None`
			`for ti, tree_data in enumerate(trees_data):`
			`for ii, item in enumerate(tree_data):`
			`if not item:`
			`continue`
			`# END skip already done items`
			`entries = [None for _ in range(nt)]`
			`entries[ti] = item`
			`sha, mode, name = item # its faster to unpack @UnusedVariable`
			`is_dir = S_ISDIR(mode) # type mode bits`

			`# find this item in all other tree data items`
			`# wrap around, but stop one before our current index, hence`
			`# ti+nt, not ti+1+nt`
			`for tio in range(ti + 1, ti + nt):`
			`tio = tio % nt`
			`entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)`
			`# END for each other item data`

			`# if we are a directory, enter recursion`
			`if is_dir:`
			`out.extend(traverse_trees_recursive(`
			`odb, [((ei and ei[0]) or None) for ei in entries], path_prefix + name + '/'))`
			`else:`
			`out_append(tuple(_to_full_path(e, path_prefix) for e in entries))`
			`# END handle recursion`

			`# finally mark it done`
			`tree_data[ii] = None`
			`# END for each item`

			`# we are done with one tree, set all its data empty`
			`del(tree_data[:])`
			`# END for each tree_data chunk`
			`return out`


			`def traverse_tree_recursive(odb, tree_sha, path_prefix):`
			`"""`
			`:return: list of entries of the tree pointed to by the binary tree_sha. An entry`
			`has the following format:`
			`* [0] 20 byte sha`
			`* [1] mode as int`
			`* [2] path relative to the repository`
			`:param path_prefix: prefix to prepend to the front of all returned paths"""`
			`entries = []`
			`data = tree_entries_from_data(odb.stream(tree_sha).read())`

			`# unpacking/packing is faster than accessing individual items`
			`for sha, mode, name in data:`
			`if S_ISDIR(mode):`
			`entries.extend(traverse_tree_recursive(odb, sha, path_prefix + name + '/'))`
			`else:`
			`entries.append((sha, mode, path_prefix + name))`
			`# END for each item`

			`return entries`