diff --git a/borg/archiver.py b/borg/archiver.py index 88a423311..5512bb258 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -20,7 +20,7 @@ format_file_mode, ExcludePattern, IncludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, prune_within, prune_split, unhexlify, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec, have_cython, is_slow_msgpack, yes, \ + dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, have_cython, is_slow_msgpack, yes, \ EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR from .logger import create_logger, setup_logging logger = create_logger() @@ -166,7 +166,8 @@ def do_create(self, args): continue else: restrict_dev = None - self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev, + self._process(archive, cache, args.excludes, args.exclude_caches, args.exclude_if_present, + args.keep_tag_files, skip_inodes, path, restrict_dev, read_special=args.read_special, dry_run=dry_run) if not dry_run: archive.save(timestamp=args.timestamp) @@ -182,7 +183,8 @@ def do_create(self, args): print('-' * 78) return self.exit_code - def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev, + def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, + keep_tag_files, skip_inodes, path, restrict_dev, read_special=False, dry_run=False): if exclude_path(path, excludes): return @@ -209,7 +211,11 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, status = 'E' self.print_warning('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): - if exclude_caches and is_cachedir(path): + tag_path = dir_is_tagged(path, exclude_caches, exclude_if_present) + if tag_path: + if keep_tag_files: + archive.process_dir(path, st) + archive.process_file(tag_path, st, cache) return if not dry_run: status = archive.process_dir(path, st) @@ -221,9 +227,9 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, else: for filename in sorted(entries): entry_path = os.path.normpath(os.path.join(path, filename)) - self._process(archive, cache, excludes, exclude_caches, skip_inodes, - entry_path, restrict_dev, read_special=read_special, - dry_run=dry_run) + self._process(archive, cache, excludes, exclude_caches, exclude_if_present, + keep_tag_files, skip_inodes, entry_path, restrict_dev, + read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): if not dry_run: status = archive.process_symlink(path, st) @@ -785,6 +791,12 @@ def build_parser(self, args=None, prog=None): subparser.add_argument('--exclude-caches', dest='exclude_caches', action='store_true', default=False, help='exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html)') + subparser.add_argument('--exclude-if-present', dest='exclude_if_present', + metavar='FILENAME', action='append', type=str, + help='exclude directories that contain the specified file') + subparser.add_argument('--keep-tag-files', dest='keep_tag_files', + action='store_true', default=False, + help='keep tag files of excluded caches/directories') subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', type=int, default=300, metavar='SECONDS', help='write checkpoint every SECONDS seconds (Default: 300)') diff --git a/borg/helpers.py b/borg/helpers.py index 1706358df..84379e75a 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -428,7 +428,7 @@ def CompressionSpec(s): raise ValueError -def is_cachedir(path): +def dir_is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to the CACHEDIR.TAG protocol @@ -448,6 +448,22 @@ def is_cachedir(path): return False +def dir_is_tagged(path, exclude_caches, exclude_if_present): + """Determines whether the specified path is excluded by being a cache + directory or containing the user-specified tag file. Returns the + path of the tag file (either CACHEDIR.TAG or the matching + user-specified file) + """ + if exclude_caches and dir_is_cachedir(path): + return os.path.join(path, 'CACHEDIR.TAG') + if exclude_if_present is not None: + for tag in exclude_if_present: + tag_path = os.path.join(path, tag) + if os.path.isfile(tag_path): + return tag_path + return None + + def format_time(t): """Format datetime suitable for fixed length list output """ diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index ca24807d5..aaa758338 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -499,6 +499,31 @@ def test_exclude_caches(self): self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1']) self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG']) + def test_exclude_tagged(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('tagged1/.NOBACKUP') + self.create_regular_file('tagged2/00-NOBACKUP') + self.create_regular_file('tagged3/.NOBACKUP/file2') + self.cmd('create', '--exclude-if-present', '.NOBACKUP', '--exclude-if-present', '00-NOBACKUP', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged3']) + + def test_exclude_keep_tagged(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('tagged1/.NOBACKUP') + self.create_regular_file('tagged1/file2', size=1024 * 80) + self.create_regular_file('tagged2/CACHEDIR.TAG', contents = b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') + self.create_regular_file('tagged2/file3', size=1024 * 80) + self.cmd('create', '--exclude-if-present', '.NOBACKUP', '--exclude-caches', '--keep-tag-files', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged1', 'tagged2']) + self.assert_equal(sorted(os.listdir('output/input/tagged1')), ['.NOBACKUP']) + self.assert_equal(sorted(os.listdir('output/input/tagged2')), ['CACHEDIR.TAG']) + def test_path_normalization(self): self.cmd('init', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80)