Merge pull request #5227 from ThomasWaldmann/fix-4911

hardlinked CACHEDIR.TAG issue
This commit is contained in:
TW 2020-06-20 00:29:56 +02:00 committed by GitHub
commit a8c262f3c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 11 deletions

View File

@ -2032,24 +2032,33 @@ class ArchiveRecreater:
matcher = self.matcher
tag_files = []
tagged_dirs = []
# build hardlink masters, but only for paths ending in CACHE_TAG_NAME, so we can read hard-linked TAGs
# to support reading hard-linked CACHEDIR.TAGs (aka CACHE_TAG_NAME), similar to hardlink_masters:
cachedir_masters = {}
if self.exclude_caches:
# sadly, due to how CACHEDIR.TAG works (filename AND file [header] contents) and
# how borg deals with hardlinks (slave hardlinks referring back to master hardlinks),
# we need to pass over the archive collecting hardlink master paths.
# as seen in issue #4911, the master paths can have an arbitrary filenames,
# not just CACHEDIR.TAG.
for item in archive.iter_items(filter=lambda item: os.path.basename(item.path) == CACHE_TAG_NAME):
if stat.S_ISREG(item.mode) and 'chunks' not in item and 'source' in item:
# this is a hardlink slave, referring back to its hardlink master (via item.source)
cachedir_masters[item.source] = None # we know the key (path), but not the value (item) yet
for item in archive.iter_items(
filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)):
if item.path.endswith(CACHE_TAG_NAME):
filter=lambda item: os.path.basename(item.path) == CACHE_TAG_NAME or matcher.match(item.path)):
if self.exclude_caches and item.path in cachedir_masters:
cachedir_masters[item.path] = item
dir, tag_file = os.path.split(item.path)
if tag_file in self.exclude_if_present:
exclude(dir, item)
if stat.S_ISREG(item.mode):
if self.exclude_caches and tag_file == CACHE_TAG_NAME:
if 'chunks' in item:
file = open_item(archive, item)
else:
file = open_item(archive, cachedir_masters[item.source])
if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
exclude(dir, item)
elif self.exclude_caches and tag_file == CACHE_TAG_NAME and stat.S_ISREG(item.mode):
content_item = item if 'chunks' in item else cachedir_masters[item.source]
file = open_item(archive, content_item)
if file.read(len(CACHE_TAG_CONTENTS)) == CACHE_TAG_CONTENTS:
exclude(dir, item)
matcher.add(tag_files, IECommand.Include)
matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)

View File

@ -1279,6 +1279,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
'--exclude-caches', '--keep-exclude-tags', self.repository_location + '::test')
self._assert_test_keep_tagged()
@pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')
def test_recreate_hardlinked_tags(self): # test for issue #4911
self.cmd('init', '--encryption=none', self.repository_location)
self.create_regular_file('file1', contents=CACHE_TAG_CONTENTS) # "wrong" filename, but correct tag contents
os.mkdir(os.path.join(self.input_path, 'subdir')) # to make sure the tag is encountered *after* file1
os.link(os.path.join(self.input_path, 'file1'),
os.path.join(self.input_path, 'subdir', CACHE_TAG_NAME)) # correct tag name, hardlink to file1
self.cmd('create', self.repository_location + '::test', 'input')
# in the "test" archive, we now have, in this order:
# - a regular file item for "file1"
# - a hardlink item for "CACHEDIR.TAG" referring back to file1 for its contents
self.cmd('recreate', '--exclude-caches', '--keep-exclude-tags', self.repository_location + '::test')
# if issue #4911 is present, the recreate will crash with a KeyError for "input/file1"
@pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='Linux capabilities test, requires fakeroot >= 1.20.2')
def test_extract_capabilities(self):
fchown = os.fchown