mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-26 17:57:59 +00:00
Merge pull request #932 from enkore/feature/inorder
create: Visit files in inode order
This commit is contained in:
commit
14bd4c756d
3 changed files with 132 additions and 35 deletions
|
@ -25,6 +25,7 @@
|
|||
log_multi, PatternMatcher, ItemFormatter
|
||||
from .logger import create_logger, setup_logging
|
||||
logger = create_logger()
|
||||
from . import helpers
|
||||
from .compress import Compressor, COMPR_BUFFER
|
||||
from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader
|
||||
from .repository import Repository
|
||||
|
@ -247,17 +248,18 @@ def create_inner(archive, cache):
|
|||
self.print_file_status(status, path)
|
||||
continue
|
||||
path = os.path.normpath(path)
|
||||
try:
|
||||
st = os.lstat(path)
|
||||
except OSError as e:
|
||||
self.print_warning('%s: %s', path, e)
|
||||
continue
|
||||
if args.one_file_system:
|
||||
try:
|
||||
restrict_dev = os.lstat(path).st_dev
|
||||
except OSError as e:
|
||||
self.print_warning('%s: %s', path, e)
|
||||
continue
|
||||
restrict_dev = st.st_dev
|
||||
else:
|
||||
restrict_dev = None
|
||||
self._process(archive, cache, matcher, args.exclude_caches, args.exclude_if_present,
|
||||
args.keep_tag_files, skip_inodes, path, restrict_dev,
|
||||
read_special=args.read_special, dry_run=dry_run)
|
||||
read_special=args.read_special, dry_run=dry_run, st=st)
|
||||
if not dry_run:
|
||||
archive.save(comment=args.comment, timestamp=args.timestamp)
|
||||
if args.progress:
|
||||
|
@ -292,16 +294,16 @@ def create_inner(archive, cache):
|
|||
|
||||
def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
|
||||
keep_tag_files, skip_inodes, path, restrict_dev,
|
||||
read_special=False, dry_run=False):
|
||||
read_special=False, dry_run=False, st=None):
|
||||
if not matcher.match(path):
|
||||
self.print_file_status('x', path)
|
||||
return
|
||||
|
||||
try:
|
||||
st = os.lstat(path)
|
||||
except OSError as e:
|
||||
self.print_warning('%s: %s', path, e)
|
||||
return
|
||||
if st is None:
|
||||
try:
|
||||
st = os.lstat(path)
|
||||
except OSError as e:
|
||||
self.print_warning('%s: %s', path, e)
|
||||
return
|
||||
if (st.st_ino, st.st_dev) in skip_inodes:
|
||||
return
|
||||
# Entering a new filesystem?
|
||||
|
@ -331,15 +333,15 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
|
|||
if not dry_run:
|
||||
status = archive.process_dir(path, st)
|
||||
try:
|
||||
entries = os.listdir(path)
|
||||
entries = helpers.scandir_inorder(path)
|
||||
except OSError as e:
|
||||
status = 'E'
|
||||
self.print_warning('%s: %s', path, e)
|
||||
else:
|
||||
for filename in sorted(entries):
|
||||
entry_path = os.path.normpath(os.path.join(path, filename))
|
||||
for dirent in entries:
|
||||
normpath = os.path.normpath(dirent.path)
|
||||
self._process(archive, cache, matcher, exclude_caches, exclude_if_present,
|
||||
keep_tag_files, skip_inodes, entry_path, restrict_dev,
|
||||
keep_tag_files, skip_inodes, normpath, restrict_dev,
|
||||
read_special=read_special, dry_run=dry_run)
|
||||
elif stat.S_ISLNK(st.st_mode):
|
||||
if not dry_run:
|
||||
|
@ -461,7 +463,7 @@ def get_mode(item):
|
|||
return [None]
|
||||
|
||||
def has_hardlink_master(item, hardlink_masters):
|
||||
return item.get(b'source') in hardlink_masters and get_mode(item)[0] != 'l'
|
||||
return stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters
|
||||
|
||||
def compare_link(item1, item2):
|
||||
# These are the simple link cases. For special cases, e.g. if a
|
||||
|
@ -524,9 +526,6 @@ def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
|
|||
"""
|
||||
changes = []
|
||||
|
||||
if item1.get(b'hardlink_master') or item2.get(b'hardlink_master'):
|
||||
hardlink_masters[path] = (item1, item2)
|
||||
|
||||
if has_hardlink_master(item1, hardlink_masters):
|
||||
item1 = hardlink_masters[item1[b'source']][0]
|
||||
|
||||
|
@ -559,8 +558,26 @@ def print_output(line):
|
|||
print("{:<19} {}".format(line[1], line[0]))
|
||||
|
||||
def compare_archives(archive1, archive2, matcher):
|
||||
def hardlink_master_seen(item):
|
||||
return b'source' not in item or not stat.S_ISREG(item[b'mode']) or item[b'source'] in hardlink_masters
|
||||
|
||||
def is_hardlink_master(item):
|
||||
return item.get(b'hardlink_master', True) and b'source' not in item
|
||||
|
||||
def update_hardlink_masters(item1, item2):
|
||||
if is_hardlink_master(item1) or is_hardlink_master(item2):
|
||||
hardlink_masters[item1[b'path']] = (item1, item2)
|
||||
|
||||
def compare_or_defer(item1, item2):
|
||||
update_hardlink_masters(item1, item2)
|
||||
if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
|
||||
deferred.append((item1, item2))
|
||||
else:
|
||||
compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
|
||||
|
||||
orphans_archive1 = collections.OrderedDict()
|
||||
orphans_archive2 = collections.OrderedDict()
|
||||
deferred = []
|
||||
hardlink_masters = {}
|
||||
output = []
|
||||
|
||||
|
@ -569,31 +586,40 @@ def compare_archives(archive1, archive2, matcher):
|
|||
archive2.iter_items(lambda item: matcher.match(item[b'path'])),
|
||||
):
|
||||
if item1 and item2 and item1[b'path'] == item2[b'path']:
|
||||
compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
|
||||
compare_or_defer(item1, item2)
|
||||
continue
|
||||
if item1:
|
||||
matching_orphan = orphans_archive2.pop(item1[b'path'], None)
|
||||
if matching_orphan:
|
||||
compare_items(output, item1[b'path'], item1, matching_orphan, hardlink_masters)
|
||||
compare_or_defer(item1, matching_orphan)
|
||||
else:
|
||||
orphans_archive1[item1[b'path']] = item1
|
||||
if item2:
|
||||
matching_orphan = orphans_archive1.pop(item2[b'path'], None)
|
||||
if matching_orphan:
|
||||
compare_items(output, item2[b'path'], matching_orphan, item2, hardlink_masters)
|
||||
compare_or_defer(matching_orphan, item2)
|
||||
else:
|
||||
orphans_archive2[item2[b'path']] = item2
|
||||
# At this point orphans_* contain items that had no matching partner in the other archive
|
||||
deleted_item = {
|
||||
b'deleted': True,
|
||||
b'chunks': [],
|
||||
b'mode': 0,
|
||||
}
|
||||
for added in orphans_archive2.values():
|
||||
compare_items(output, added[b'path'], {
|
||||
b'deleted': True,
|
||||
b'chunks': [],
|
||||
}, added, hardlink_masters, deleted=True)
|
||||
path = added[b'path']
|
||||
deleted_item[b'path'] = path
|
||||
update_hardlink_masters(deleted_item, added)
|
||||
compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
|
||||
for deleted in orphans_archive1.values():
|
||||
compare_items(output, deleted[b'path'], deleted, {
|
||||
b'deleted': True,
|
||||
b'chunks': [],
|
||||
}, hardlink_masters, deleted=True)
|
||||
path = deleted[b'path']
|
||||
deleted_item[b'path'] = path
|
||||
update_hardlink_masters(deleted, deleted_item)
|
||||
compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
|
||||
for item1, item2 in deferred:
|
||||
assert hardlink_master_seen(item1)
|
||||
assert hardlink_master_seen(item2)
|
||||
compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
|
||||
|
||||
for line in sorted(output):
|
||||
print_output(line)
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
import hashlib
|
||||
from itertools import islice
|
||||
import os
|
||||
import os.path
|
||||
import stat
|
||||
import textwrap
|
||||
import pwd
|
||||
|
@ -1349,3 +1350,68 @@ def consume(iterator, n=None):
|
|||
else:
|
||||
# advance to the empty slice starting at position n
|
||||
next(islice(iterator, n, n), None)
|
||||
|
||||
# GenericDirEntry, scandir_generic (c) 2012 Ben Hoyt
|
||||
# from the python-scandir package (3-clause BSD license, just like us, so no troubles here)
|
||||
# note: simplified version
|
||||
|
||||
|
||||
class GenericDirEntry:
|
||||
__slots__ = ('name', '_scandir_path', '_path')
|
||||
|
||||
def __init__(self, scandir_path, name):
|
||||
self._scandir_path = scandir_path
|
||||
self.name = name
|
||||
self._path = None
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
if self._path is None:
|
||||
self._path = os.path.join(self._scandir_path, self.name)
|
||||
return self._path
|
||||
|
||||
def stat(self, follow_symlinks=True):
|
||||
assert not follow_symlinks
|
||||
return os.lstat(self.path)
|
||||
|
||||
def _check_type(self, type):
|
||||
st = self.stat(False)
|
||||
return stat.S_IFMT(st.st_mode) == type
|
||||
|
||||
def is_dir(self, follow_symlinks=True):
|
||||
assert not follow_symlinks
|
||||
return self._check_type(stat.S_IFDIR)
|
||||
|
||||
def is_file(self, follow_symlinks=True):
|
||||
assert not follow_symlinks
|
||||
return self._check_type(stat.S_IFREG)
|
||||
|
||||
def is_symlink(self):
|
||||
return self._check_type(stat.S_IFLNK)
|
||||
|
||||
def inode(self):
|
||||
st = self.stat(False)
|
||||
return st.st_ino
|
||||
|
||||
def __repr__(self):
|
||||
return '<{0}: {1!r}>'.format(self.__class__.__name__, self.path)
|
||||
|
||||
|
||||
def scandir_generic(path='.'):
|
||||
"""Like os.listdir(), but yield DirEntry objects instead of returning a list of names."""
|
||||
for name in sorted(os.listdir(path)):
|
||||
yield GenericDirEntry(path, name)
|
||||
|
||||
try:
|
||||
from os import scandir
|
||||
except ImportError:
|
||||
try:
|
||||
# Try python-scandir on Python 3.4
|
||||
from scandir import scandir
|
||||
except ImportError:
|
||||
# If python-scandir is not installed, then use a version that is just as slow as listdir.
|
||||
scandir = scandir_generic
|
||||
|
||||
|
||||
def scandir_inorder(path='.'):
|
||||
return sorted(scandir(path), key=lambda dirent: dirent.inode())
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from .. import xattr
|
||||
from .. import xattr, helpers
|
||||
from ..archive import Archive, ChunkBuffer, ArchiveRecreater
|
||||
from ..archiver import Archiver
|
||||
from ..cache import Cache
|
||||
|
@ -1314,11 +1314,16 @@ def _test_recreate_interrupt(self, change_args, interrupt_early):
|
|||
assert 'dir2/abcdef' in files
|
||||
assert 'file1' not in files
|
||||
|
||||
# The _test_create_interrupt requires a deterministic (alphabetic) order of the files to easily check if
|
||||
# resumption works correctly. Patch scandir_inorder to work in alphabetic order.
|
||||
|
||||
def test_recreate_interrupt(self):
|
||||
self._test_recreate_interrupt(False, True)
|
||||
with patch.object(helpers, 'scandir_inorder', helpers.scandir_generic):
|
||||
self._test_recreate_interrupt(False, True)
|
||||
|
||||
def test_recreate_interrupt2(self):
|
||||
self._test_recreate_interrupt(True, False)
|
||||
with patch.object(helpers, 'scandir_inorder', helpers.scandir_generic):
|
||||
self._test_recreate_interrupt(True, False)
|
||||
|
||||
def _test_recreate_chunker_interrupt_patch(self):
|
||||
real_add_chunk = Cache.add_chunk
|
||||
|
|
Loading…
Reference in a new issue