From 1d5410a3cfaf8ae3127d3b664c15a65e8bba143a Mon Sep 17 00:00:00 2001 From: Jeremy Maitin-Shepard Date: Wed, 30 Apr 2014 14:27:04 -0700 Subject: [PATCH] Add --exclude-caches option, for excluding CACHEDIR.TAG directories See http://www.brynosaurus.com/cachedir/spec.html --- attic/archiver.py | 14 ++++++++++---- attic/helpers.py | 19 +++++++++++++++++++ attic/testsuite/archiver.py | 17 +++++++++++++++-- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 2827209c8..5b2757f78 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -17,7 +17,8 @@ from attic.key import key_creator from attic.helpers import Error, location_validator, format_time, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ - Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics + Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ + is_cachedir from attic.remote import RepositoryServer, RemoteRepository @@ -124,7 +125,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") continue else: restrict_dev = None - self._process(archive, cache, args.excludes, skip_inodes, path, restrict_dev) + self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev) archive.save() if args.stats: t = datetime.now() @@ -140,7 +141,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print('-' * 78) return self.exit_code - def _process(self, archive, cache, excludes, skip_inodes, path, restrict_dev): + def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev): if exclude_path(path, excludes): return try: @@ -163,6 +164,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") except IOError as e: self.print_error('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): + if exclude_caches and is_cachedir(path): + return archive.process_item(path, st) try: entries = os.listdir(path) @@ -170,7 +173,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") self.print_error('%s: %s', path, e) else: for filename in sorted(entries): - self._process(archive, cache, excludes, skip_inodes, + self._process(archive, cache, excludes, exclude_caches, skip_inodes, os.path.join(path, filename), restrict_dev) elif stat.S_ISLNK(st.st_mode): archive.process_symlink(path, st) @@ -519,6 +522,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--exclude-from', dest='exclude_files', type=argparse.FileType('r'), action='append', metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') + subparser.add_argument('--exclude-caches', dest='exclude_caches', + action='store_true', default=False, + help='exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html)') subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', type=int, default=300, metavar='SECONDS', help='write checkpoint every SECONDS seconds (Default: 300)') diff --git a/attic/helpers.py b/attic/helpers.py index c99965c94..d8a12dc02 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -245,6 +245,25 @@ class ExcludePattern(IncludePattern): def __repr__(self): return '%s(%s)' % (type(self), self.pattern) +def is_cachedir(path): + """Determines whether the specified path is a cache directory (and + therefore should potentially be excluded from the backup) according to + the CACHEDIR.TAG protocol + (http://www.brynosaurus.com/cachedir/spec.html). + """ + + tag_filename = 'CACHEDIR.TAG' + tag_contents = b'Signature: 8a477f597d28d172789f06886806bc55' + tag_path = os.path.join(path, 'CACHEDIR.TAG') + try: + if os.path.exists(tag_path): + with open(tag_path, 'rb') as tag_file: + tag_data = tag_file.read(len(tag_contents)) + if tag_data == tag_contents: + return True + except OSError as e: + raise + return False def walk_path(path, skip_inodes=None): st = os.lstat(path) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index a6fea2135..696d61eb6 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -106,12 +106,14 @@ class ArchiverTestCaseBase(AtticTestCase): class ArchiverTestCase(ArchiverTestCaseBase): - def create_regular_file(self, name, size=0): + def create_regular_file(self, name, size=0, contents=None): filename = os.path.join(self.input_path, name) if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) with open(filename, 'wb') as fd: - fd.write(b'X' * size) + if contents is None: + contents = b'X' * size + fd.write(contents) def create_test_files(self): """Create a minimal test case including all supported file types @@ -178,6 +180,17 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.attic('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3']) + def test_exclude_caches(self): + self.attic('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('cache1/CACHEDIR.TAG', contents = b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') + self.create_regular_file('cache2/CACHEDIR.TAG', contents = b'invalid signature') + self.attic('create', '--exclude-caches', self.repository_location + '::test', 'input') + with changedir('output'): + self.attic('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1']) + self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG']) + def test_path_normalization(self): self.attic('init', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80)