Add --exclude-caches option, for excluding CACHEDIR.TAG directories

See http://www.brynosaurus.com/cachedir/spec.html
This commit is contained in:
Jeremy Maitin-Shepard 2014-04-30 14:27:04 -07:00
parent 1955484e1b
commit 1d5410a3cf
3 changed files with 44 additions and 6 deletions

View File

@ -17,7 +17,8 @@ from attic.key import key_creator
from attic.helpers import Error, location_validator, format_time, \
format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
is_cachedir
from attic.remote import RepositoryServer, RemoteRepository
@ -124,7 +125,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
continue
else:
restrict_dev = None
self._process(archive, cache, args.excludes, skip_inodes, path, restrict_dev)
self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev)
archive.save()
if args.stats:
t = datetime.now()
@ -140,7 +141,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
print('-' * 78)
return self.exit_code
def _process(self, archive, cache, excludes, skip_inodes, path, restrict_dev):
def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev):
if exclude_path(path, excludes):
return
try:
@ -163,6 +164,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
except IOError as e:
self.print_error('%s: %s', path, e)
elif stat.S_ISDIR(st.st_mode):
if exclude_caches and is_cachedir(path):
return
archive.process_item(path, st)
try:
entries = os.listdir(path)
@ -170,7 +173,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
self.print_error('%s: %s', path, e)
else:
for filename in sorted(entries):
self._process(archive, cache, excludes, skip_inodes,
self._process(archive, cache, excludes, exclude_caches, skip_inodes,
os.path.join(path, filename), restrict_dev)
elif stat.S_ISLNK(st.st_mode):
archive.process_symlink(path, st)
@ -519,6 +522,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--exclude-caches', dest='exclude_caches',
action='store_true', default=False,
help='exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html)')
subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
type=int, default=300, metavar='SECONDS',
help='write checkpoint every SECONDS seconds (Default: 300)')

View File

@ -245,6 +245,25 @@ class ExcludePattern(IncludePattern):
def __repr__(self):
return '%s(%s)' % (type(self), self.pattern)
def is_cachedir(path):
"""Determines whether the specified path is a cache directory (and
therefore should potentially be excluded from the backup) according to
the CACHEDIR.TAG protocol
(http://www.brynosaurus.com/cachedir/spec.html).
"""
tag_filename = 'CACHEDIR.TAG'
tag_contents = b'Signature: 8a477f597d28d172789f06886806bc55'
tag_path = os.path.join(path, 'CACHEDIR.TAG')
try:
if os.path.exists(tag_path):
with open(tag_path, 'rb') as tag_file:
tag_data = tag_file.read(len(tag_contents))
if tag_data == tag_contents:
return True
except OSError as e:
raise
return False
def walk_path(path, skip_inodes=None):
st = os.lstat(path)

View File

@ -106,12 +106,14 @@ class ArchiverTestCaseBase(AtticTestCase):
class ArchiverTestCase(ArchiverTestCaseBase):
def create_regular_file(self, name, size=0):
def create_regular_file(self, name, size=0, contents=None):
filename = os.path.join(self.input_path, name)
if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename))
with open(filename, 'wb') as fd:
fd.write(b'X' * size)
if contents is None:
contents = b'X' * size
fd.write(contents)
def create_test_files(self):
"""Create a minimal test case including all supported file types
@ -178,6 +180,17 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.attic('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3'])
def test_exclude_caches(self):
self.attic('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('cache1/CACHEDIR.TAG', contents = b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff')
self.create_regular_file('cache2/CACHEDIR.TAG', contents = b'invalid signature')
self.attic('create', '--exclude-caches', self.repository_location + '::test', 'input')
with changedir('output'):
self.attic('extract', self.repository_location + '::test')
self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1'])
self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG'])
def test_path_normalization(self):
self.attic('init', self.repository_location)
self.create_regular_file('dir1/dir2/file', size=1024 * 80)