bazarr/libs/gitdb/db/loose.py

# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb.db.base import (
    FileDBBase,
    ObjectDBR,
    ObjectDBW
)


from gitdb.exc import (
    BadObject,
    AmbiguousObjectName
)

from gitdb.stream import (
    DecompressMemMapReader,
    FDCompressedSha1Writer,
    FDStream,
    Sha1Writer
)

from gitdb.base import (
    OStream,
    OInfo
)

from gitdb.util import (
    file_contents_ro_filepath,
    ENOENT,
    hex_to_bin,
    bin_to_hex,
    exists,
    chmod,
    isdir,
    isfile,
    remove,
    mkdir,
    rename,
    dirname,
    basename,
    join
)

from gitdb.fun import (
    chunk_size,
    loose_object_header_info,
    write_object,
    stream_copy
)

from gitdb.utils.compat import MAXSIZE
from gitdb.utils.encoding import force_bytes

import tempfile
import os


__all__ = ('LooseObjectDB', )


class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):

    """A database which operates on loose object files"""

    # CONFIGURATION
    # chunks in which data will be copied between streams
    stream_chunk_size = chunk_size

    # On windows we need to keep it writable, otherwise it cannot be removed
    # either
    new_objects_mode = int("444", 8)
    if os.name == 'nt':
        new_objects_mode = int("644", 8)

    def __init__(self, root_path):
        super(LooseObjectDB, self).__init__(root_path)
        self._hexsha_to_file = dict()
        # Additional Flags - might be set to 0 after the first failure
        # Depending on the root, this might work for some mounts, for others not, which
        # is why it is per instance
        self._fd_open_flags = getattr(os, 'O_NOATIME', 0)

    #{ Interface
    def object_path(self, hexsha):
        """
        :return: path at which the object with the given hexsha would be stored,
            relative to the database root"""
        return join(hexsha[:2], hexsha[2:])

    def readable_db_object_path(self, hexsha):
        """
        :return: readable object path to the object identified by hexsha
        :raise BadObject: If the object file does not exist"""
        try:
            return self._hexsha_to_file[hexsha]
        except KeyError:
            pass
        # END ignore cache misses

        # try filesystem
        path = self.db_path(self.object_path(hexsha))
        if exists(path):
            self._hexsha_to_file[hexsha] = path
            return path
        # END handle cache
        raise BadObject(hexsha)

    def partial_to_complete_sha_hex(self, partial_hexsha):
        """:return: 20 byte binary sha1 string which matches the given name uniquely
        :param name: hexadecimal partial name (bytes or ascii string)
        :raise AmbiguousObjectName:
        :raise BadObject: """
        candidate = None
        for binsha in self.sha_iter():
            if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
                # it can't ever find the same object twice
                if candidate is not None:
                    raise AmbiguousObjectName(partial_hexsha)
                candidate = binsha
        # END for each object
        if candidate is None:
            raise BadObject(partial_hexsha)
        return candidate

    #} END interface

    def _map_loose_object(self, sha):
        """
        :return: memory map of that file to allow random read access
        :raise BadObject: if object could not be located"""
        db_path = self.db_path(self.object_path(bin_to_hex(sha)))
        try:
            return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
        except OSError as e:
            if e.errno != ENOENT:
                # try again without noatime
                try:
                    return file_contents_ro_filepath(db_path)
                except OSError:
                    raise BadObject(sha)
                # didn't work because of our flag, don't try it again
                self._fd_open_flags = 0
            else:
                raise BadObject(sha)
            # END handle error
        # END exception handling

    def set_ostream(self, stream):
        """:raise TypeError: if the stream does not support the Sha1Writer interface"""
        if stream is not None and not isinstance(stream, Sha1Writer):
            raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
        return super(LooseObjectDB, self).set_ostream(stream)

    def info(self, sha):
        m = self._map_loose_object(sha)
        try:
            typ, size = loose_object_header_info(m)
            return OInfo(sha, typ, size)
        finally:
            if hasattr(m, 'close'):
                m.close()
        # END assure release of system resources

    def stream(self, sha):
        m = self._map_loose_object(sha)
        type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
        return OStream(sha, type, size, stream)

    def has_object(self, sha):
        try:
            self.readable_db_object_path(bin_to_hex(sha))
            return True
        except BadObject:
            return False
        # END check existence

    def store(self, istream):
        """note: The sha we produce will be hex by nature"""
        tmp_path = None
        writer = self.ostream()
        if writer is None:
            # open a tmp file to write the data to
            fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)

            if istream.binsha is None:
                writer = FDCompressedSha1Writer(fd)
            else:
                writer = FDStream(fd)
            # END handle direct stream copies
        # END handle custom writer

        try:
            try:
                if istream.binsha is not None:
                    # copy as much as possible, the actual uncompressed item size might
                    # be smaller than the compressed version
                    stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
                else:
                    # write object with header, we have to make a new one
                    write_object(istream.type, istream.size, istream.read, writer.write,
                                 chunk_size=self.stream_chunk_size)
                # END handle direct stream copies
            finally:
                if tmp_path:
                    writer.close()
            # END assure target stream is closed
        except:
            if tmp_path:
                os.remove(tmp_path)
            raise
        # END assure tmpfile removal on error

        hexsha = None
        if istream.binsha:
            hexsha = istream.hexsha
        else:
            hexsha = writer.sha(as_hex=True)
        # END handle sha

        if tmp_path:
            obj_path = self.db_path(self.object_path(hexsha))
            obj_dir = dirname(obj_path)
            if not isdir(obj_dir):
                mkdir(obj_dir)
            # END handle destination directory
            # rename onto existing doesn't work on windows
            if os.name == 'nt':
                if isfile(obj_path):
                    remove(tmp_path)
                else:
                    rename(tmp_path, obj_path)
                # end rename only if needed
            else:
                rename(tmp_path, obj_path)
            # END handle win32

            # make sure its readable for all ! It started out as rw-- tmp file
            # but needs to be rwrr
            chmod(obj_path, self.new_objects_mode)
        # END handle dry_run

        istream.binsha = hex_to_bin(hexsha)
        return istream

    def sha_iter(self):
        # find all files which look like an object, extract sha from there
        for root, dirs, files in os.walk(self.root_path()):
            root_base = basename(root)
            if len(root_base) != 2:
                continue

            for f in files:
                if len(f) != 38:
                    continue
                yield hex_to_bin(root_base + f)
            # END for each file
        # END for each walk iteration

    def size(self):
        return len(tuple(self.sha_iter()))