1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-24 07:01:59 +00:00

create a RepositoryCache implementation that can cope with any amount of data, fixes attic #326

the old code blows up with an integer OverflowError when the cache file goes beyond 2GiB size.
the new code just reuses the Repository implementation as a local temporary key/value store.

still an issue: if the place where the temporary RepositoryCache is stored (usually /tmp) can't
cope with the cache size and runs full.

if you copy data from a fuse mount, the cache size is the copied deduplicated data size.
so, if you have lots of data to extract (more than your /tmp can hold), rather do not use fuse!

besides fuse mounts, this also affects attic check and cache sync (in these cases, only the
metadata size counts, but even that can go beyond 2GiB for some people).
This commit is contained in:
Thomas Waldmann 2015-07-12 00:18:49 +02:00
parent afae720112
commit bd354d7bb4
2 changed files with 13 additions and 37 deletions

View file

@ -3,7 +3,6 @@
import msgpack import msgpack
import os import os
import select import select
import shutil
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
import sys import sys
import tempfile import tempfile
@ -11,7 +10,6 @@
from . import __version__ from . import __version__
from .hashindex import NSIndex
from .helpers import Error, IntegrityError from .helpers import Error, IntegrityError
from .repository import Repository from .repository import Repository
@ -292,56 +290,29 @@ def preload(self, ids):
class RepositoryCache: class RepositoryCache:
"""A caching Repository wrapper """A caching Repository wrapper
Caches Repository GET operations using a temporary file Caches Repository GET operations using a local temporary Repository.
""" """
def __init__(self, repository): def __init__(self, repository):
self.tmppath = None
self.index = None
self.data_fd = None
self.repository = repository self.repository = repository
self.entries = {} tmppath = tempfile.mkdtemp(prefix='borg-tmp')
self.initialize() self.caching_repo = Repository(tmppath, create=True, exclusive=True)
def __del__(self): def __del__(self):
self.cleanup() self.caching_repo.destroy()
def initialize(self):
self.tmppath = tempfile.mkdtemp(prefix='borg-tmp')
self.index = NSIndex()
self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
def cleanup(self):
del self.index
if self.data_fd:
self.data_fd.close()
if self.tmppath:
shutil.rmtree(self.tmppath)
def load_object(self, offset, size):
self.data_fd.seek(offset)
data = self.data_fd.read(size)
assert len(data) == size
return data
def store_object(self, key, data):
self.data_fd.seek(0, os.SEEK_END)
self.data_fd.write(data)
offset = self.data_fd.tell()
self.index[key] = offset - len(data), len(data)
def get(self, key): def get(self, key):
return next(self.get_many([key])) return next(self.get_many([key]))
def get_many(self, keys): def get_many(self, keys):
unknown_keys = [key for key in keys if key not in self.index] unknown_keys = [key for key in keys if key not in self.caching_repo]
repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys)) repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys))
for key in keys: for key in keys:
try: try:
yield self.load_object(*self.index[key]) yield self.caching_repo.get(key)
except KeyError: except Repository.ObjectNotFound:
for key_, data in repository_iterator: for key_, data in repository_iterator:
if key_ == key: if key_ == key:
self.store_object(key, data) self.caching_repo.put(key, data)
yield data yield data
break break
# Consume any pending requests # Consume any pending requests

View file

@ -341,6 +341,11 @@ def __len__(self):
self.index = self.open_index(self.get_transaction_id()) self.index = self.open_index(self.get_transaction_id())
return len(self.index) return len(self.index)
def __contains__(self, id):
if not self.index:
self.index = self.open_index(self.get_transaction_id())
return id in self.index
def list(self, limit=None, marker=None): def list(self, limit=None, marker=None):
if not self.index: if not self.index:
self.index = self.open_index(self.get_transaction_id()) self.index = self.open_index(self.get_transaction_id())