Support patterns on extraction, fixes #361

This change implements the functionality requested in issue #361:
extracting files with a given extension. It does so by permitting
patterns to be used instead plain prefix paths. The pattern styles
supported are the same as for exclusions.
This commit is contained in:
Michael Hanselmann 2016-01-18 16:45:42 +01:00
parent 848375e2fe
commit ceae4a9fa8
2 changed files with 50 additions and 5 deletions

View File

@ -286,13 +286,25 @@ class Archiver:
manifest, key = Manifest.load(repository)
archive = Archive(repository, key, manifest, args.location.archive,
numeric_owner=args.numeric_owner)
patterns = adjust_patterns(args.paths, args.excludes)
matcher = PatternMatcher()
if args.excludes:
matcher.add(args.excludes, False)
include_patterns = []
if args.paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
dry_run = args.dry_run
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
dirs = []
for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True):
for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
orig_path = item[b'path']
if strip_components:
item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
@ -317,8 +329,8 @@ class Archiver:
if not args.dry_run:
while dirs:
archive.extract_item(dirs.pop(-1))
for pattern in (patterns or []):
if isinstance(pattern, PathPrefixPattern) and pattern.match_count == 0:
for pattern in include_patterns:
if pattern.match_count == 0:
self.print_warning("Include pattern '%s' never matched.", pattern)
return self.exit_code
@ -965,7 +977,7 @@ class Archiver:
type=location_validator(archive=True),
help='archive to extract')
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to extract')
help='paths to extract; patterns are supported')
rename_epilog = textwrap.dedent("""
This command renames an archive in the repository.

View File

@ -562,6 +562,39 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
self.assert_equal(sorted(os.listdir('output/input')), ['file3'])
def test_extract_with_pattern(self):
self.cmd("init", self.repository_location)
self.create_regular_file("file1", size=1024 * 80)
self.create_regular_file("file2", size=1024 * 80)
self.create_regular_file("file3", size=1024 * 80)
self.create_regular_file("file4", size=1024 * 80)
self.create_regular_file("file333", size=1024 * 80)
self.cmd("create", self.repository_location + "::test", "input")
# Extract everything with regular expression
with changedir("output"):
self.cmd("extract", self.repository_location + "::test", "re:.*")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file3", "file333", "file4"])
shutil.rmtree("output/input")
# Extract with pattern while also excluding files
with changedir("output"):
self.cmd("extract", "--exclude=re:file[34]$", self.repository_location + "::test", r"re:file\d$")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2"])
shutil.rmtree("output/input")
# Combine --exclude with pattern for extraction
with changedir("output"):
self.cmd("extract", "--exclude=input/file1", self.repository_location + "::test", "re:file[12]$")
self.assert_equal(sorted(os.listdir("output/input")), ["file2"])
shutil.rmtree("output/input")
# Multiple pattern
with changedir("output"):
self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"])
def test_exclude_caches(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)