mirror of https://github.com/morpheus65535/bazarr
263 lines
8.1 KiB
Python
263 lines
8.1 KiB
Python
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
|
#
|
|
# This module is part of GitDB and is released under
|
|
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
|
from gitdb.db.base import (
|
|
FileDBBase,
|
|
ObjectDBR,
|
|
ObjectDBW
|
|
)
|
|
|
|
|
|
from gitdb.exc import (
|
|
BadObject,
|
|
AmbiguousObjectName
|
|
)
|
|
|
|
from gitdb.stream import (
|
|
DecompressMemMapReader,
|
|
FDCompressedSha1Writer,
|
|
FDStream,
|
|
Sha1Writer
|
|
)
|
|
|
|
from gitdb.base import (
|
|
OStream,
|
|
OInfo
|
|
)
|
|
|
|
from gitdb.util import (
|
|
file_contents_ro_filepath,
|
|
ENOENT,
|
|
hex_to_bin,
|
|
bin_to_hex,
|
|
exists,
|
|
chmod,
|
|
isdir,
|
|
isfile,
|
|
remove,
|
|
mkdir,
|
|
rename,
|
|
dirname,
|
|
basename,
|
|
join
|
|
)
|
|
|
|
from gitdb.fun import (
|
|
chunk_size,
|
|
loose_object_header_info,
|
|
write_object,
|
|
stream_copy
|
|
)
|
|
|
|
from gitdb.utils.compat import MAXSIZE
|
|
from gitdb.utils.encoding import force_bytes
|
|
|
|
import tempfile
|
|
import os
|
|
|
|
|
|
__all__ = ('LooseObjectDB', )
|
|
|
|
|
|
class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
|
|
|
|
"""A database which operates on loose object files"""
|
|
|
|
# CONFIGURATION
|
|
# chunks in which data will be copied between streams
|
|
stream_chunk_size = chunk_size
|
|
|
|
# On windows we need to keep it writable, otherwise it cannot be removed
|
|
# either
|
|
new_objects_mode = int("444", 8)
|
|
if os.name == 'nt':
|
|
new_objects_mode = int("644", 8)
|
|
|
|
def __init__(self, root_path):
|
|
super(LooseObjectDB, self).__init__(root_path)
|
|
self._hexsha_to_file = dict()
|
|
# Additional Flags - might be set to 0 after the first failure
|
|
# Depending on the root, this might work for some mounts, for others not, which
|
|
# is why it is per instance
|
|
self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
|
|
|
|
#{ Interface
|
|
def object_path(self, hexsha):
|
|
"""
|
|
:return: path at which the object with the given hexsha would be stored,
|
|
relative to the database root"""
|
|
return join(hexsha[:2], hexsha[2:])
|
|
|
|
def readable_db_object_path(self, hexsha):
|
|
"""
|
|
:return: readable object path to the object identified by hexsha
|
|
:raise BadObject: If the object file does not exist"""
|
|
try:
|
|
return self._hexsha_to_file[hexsha]
|
|
except KeyError:
|
|
pass
|
|
# END ignore cache misses
|
|
|
|
# try filesystem
|
|
path = self.db_path(self.object_path(hexsha))
|
|
if exists(path):
|
|
self._hexsha_to_file[hexsha] = path
|
|
return path
|
|
# END handle cache
|
|
raise BadObject(hexsha)
|
|
|
|
def partial_to_complete_sha_hex(self, partial_hexsha):
|
|
""":return: 20 byte binary sha1 string which matches the given name uniquely
|
|
:param name: hexadecimal partial name (bytes or ascii string)
|
|
:raise AmbiguousObjectName:
|
|
:raise BadObject: """
|
|
candidate = None
|
|
for binsha in self.sha_iter():
|
|
if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
|
|
# it can't ever find the same object twice
|
|
if candidate is not None:
|
|
raise AmbiguousObjectName(partial_hexsha)
|
|
candidate = binsha
|
|
# END for each object
|
|
if candidate is None:
|
|
raise BadObject(partial_hexsha)
|
|
return candidate
|
|
|
|
#} END interface
|
|
|
|
def _map_loose_object(self, sha):
|
|
"""
|
|
:return: memory map of that file to allow random read access
|
|
:raise BadObject: if object could not be located"""
|
|
db_path = self.db_path(self.object_path(bin_to_hex(sha)))
|
|
try:
|
|
return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
|
|
except OSError as e:
|
|
if e.errno != ENOENT:
|
|
# try again without noatime
|
|
try:
|
|
return file_contents_ro_filepath(db_path)
|
|
except OSError:
|
|
raise BadObject(sha)
|
|
# didn't work because of our flag, don't try it again
|
|
self._fd_open_flags = 0
|
|
else:
|
|
raise BadObject(sha)
|
|
# END handle error
|
|
# END exception handling
|
|
|
|
def set_ostream(self, stream):
|
|
""":raise TypeError: if the stream does not support the Sha1Writer interface"""
|
|
if stream is not None and not isinstance(stream, Sha1Writer):
|
|
raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
|
|
return super(LooseObjectDB, self).set_ostream(stream)
|
|
|
|
def info(self, sha):
|
|
m = self._map_loose_object(sha)
|
|
try:
|
|
typ, size = loose_object_header_info(m)
|
|
return OInfo(sha, typ, size)
|
|
finally:
|
|
if hasattr(m, 'close'):
|
|
m.close()
|
|
# END assure release of system resources
|
|
|
|
def stream(self, sha):
|
|
m = self._map_loose_object(sha)
|
|
type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
|
|
return OStream(sha, type, size, stream)
|
|
|
|
def has_object(self, sha):
|
|
try:
|
|
self.readable_db_object_path(bin_to_hex(sha))
|
|
return True
|
|
except BadObject:
|
|
return False
|
|
# END check existence
|
|
|
|
def store(self, istream):
|
|
"""note: The sha we produce will be hex by nature"""
|
|
tmp_path = None
|
|
writer = self.ostream()
|
|
if writer is None:
|
|
# open a tmp file to write the data to
|
|
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
|
|
|
|
if istream.binsha is None:
|
|
writer = FDCompressedSha1Writer(fd)
|
|
else:
|
|
writer = FDStream(fd)
|
|
# END handle direct stream copies
|
|
# END handle custom writer
|
|
|
|
try:
|
|
try:
|
|
if istream.binsha is not None:
|
|
# copy as much as possible, the actual uncompressed item size might
|
|
# be smaller than the compressed version
|
|
stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
|
|
else:
|
|
# write object with header, we have to make a new one
|
|
write_object(istream.type, istream.size, istream.read, writer.write,
|
|
chunk_size=self.stream_chunk_size)
|
|
# END handle direct stream copies
|
|
finally:
|
|
if tmp_path:
|
|
writer.close()
|
|
# END assure target stream is closed
|
|
except:
|
|
if tmp_path:
|
|
os.remove(tmp_path)
|
|
raise
|
|
# END assure tmpfile removal on error
|
|
|
|
hexsha = None
|
|
if istream.binsha:
|
|
hexsha = istream.hexsha
|
|
else:
|
|
hexsha = writer.sha(as_hex=True)
|
|
# END handle sha
|
|
|
|
if tmp_path:
|
|
obj_path = self.db_path(self.object_path(hexsha))
|
|
obj_dir = dirname(obj_path)
|
|
if not isdir(obj_dir):
|
|
mkdir(obj_dir)
|
|
# END handle destination directory
|
|
# rename onto existing doesn't work on windows
|
|
if os.name == 'nt':
|
|
if isfile(obj_path):
|
|
remove(tmp_path)
|
|
else:
|
|
rename(tmp_path, obj_path)
|
|
# end rename only if needed
|
|
else:
|
|
rename(tmp_path, obj_path)
|
|
# END handle win32
|
|
|
|
# make sure its readable for all ! It started out as rw-- tmp file
|
|
# but needs to be rwrr
|
|
chmod(obj_path, self.new_objects_mode)
|
|
# END handle dry_run
|
|
|
|
istream.binsha = hex_to_bin(hexsha)
|
|
return istream
|
|
|
|
def sha_iter(self):
|
|
# find all files which look like an object, extract sha from there
|
|
for root, dirs, files in os.walk(self.root_path()):
|
|
root_base = basename(root)
|
|
if len(root_base) != 2:
|
|
continue
|
|
|
|
for f in files:
|
|
if len(f) != 38:
|
|
continue
|
|
yield hex_to_bin(root_base + f)
|
|
# END for each file
|
|
# END for each walk iteration
|
|
|
|
def size(self):
|
|
return len(tuple(self.sha_iter()))
|