Merge branch 'master' of github.com:borgbackup/borg

This commit is contained in:
Thomas Waldmann 2016-03-21 16:14:24 +01:00
commit 601313836d
7 changed files with 259 additions and 40 deletions

View File

@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
cache.rollback()
return stats
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
hardlink_masters=None, original_path=None):
"""
Extract archive item.
:param item: the item to extract
:param restore_attrs: restore file attributes
:param dry_run: do not write any data
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
:param original_path: b'path' key as stored in archive
"""
if dry_run or stdout:
if b'chunks' in item:
for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
sys.stdout.buffer.flush()
return
original_path = original_path or item[b'path']
dest = self.cwd
if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
raise Exception('Path should be relative and local')
@ -327,25 +340,36 @@ Number of files: {0.stats.nfiles}'''.format(
if stat.S_ISREG(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
# Hard link?
if b'source' in item:
source = os.path.join(dest, item[b'source'])
if os.path.exists(path):
os.unlink(path)
os.link(source, path)
else:
with open(path, 'wb') as fd:
ids = [c[0] for c in item[b'chunks']]
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
if sparse and self.zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
pos = fd.tell()
fd.truncate(pos)
fd.flush()
self.restore_attrs(path, item, fd=fd.fileno())
if not hardlink_masters:
os.link(source, path)
return
item[b'chunks'], link_target = hardlink_masters[item[b'source']]
if link_target:
# Hard link was extracted previously, just link
os.link(link_target, path)
return
# Extract chunks, since the item which had the chunks was not extracted
with open(path, 'wb') as fd:
ids = [c[0] for c in item[b'chunks']]
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
if sparse and self.zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
pos = fd.tell()
fd.truncate(pos)
fd.flush()
self.restore_attrs(path, item, fd=fd.fileno())
if hardlink_masters:
# Update master entry with extracted file path, so that following hardlinks don't extract twice.
hardlink_masters[item.get(b'source') or original_path] = (None, path)
elif stat.S_ISDIR(mode):
if not os.path.exists(path):
os.makedirs(path)
@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
source = self.hard_links.get((st.st_ino, st.st_dev))
if (st.st_ino, st.st_dev) in self.hard_links:
item = self.stat_attrs(st, path)
item.update({b'path': safe_path, b'source': source})
item.update({
b'path': safe_path,
b'source': source,
})
self.add_item(item)
status = 'h' # regular file, hardlink (to already seen inodes)
return status
@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
status = 'U' # regular file, unchanged
else:
status = 'A' # regular file, added
item = {b'path': safe_path}
item = {
b'path': safe_path,
b'hardlink_master': st.st_nlink > 1, # item is a hard link and has the chunks
}
# Only chunkify the file if needed
if chunks is None:
fh = Archive._open_rb(path)
@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])

View File

@ -359,8 +359,20 @@ class Archiver:
sparse = args.sparse
strip_components = args.strip_components
dirs = []
for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None
def item_is_hardlink_master(item):
return (partial_extract and stat.S_ISREG(item[b'mode']) and
item.get(b'hardlink_master', True) and b'source' not in item)
for item in archive.iter_items(preload=True,
filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
orig_path = item[b'path']
if item_is_hardlink_master(item):
hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
if not matcher.match(item[b'path']):
continue
if strip_components:
item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
if not item[b'path']:
@ -378,7 +390,8 @@ class Archiver:
dirs.append(item)
archive.extract_item(item, restore_attrs=False)
else:
archive.extract_item(item, stdout=stdout, sparse=sparse)
archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
original_path=orig_path)
except OSError as e:
self.print_warning('%s: %s', remove_surrogates(orig_path), e)
@ -1205,6 +1218,15 @@ class Archiver:
Both archives need to be in the same repository, and a repository location may only
be specified for ARCHIVE1.
For archives created with Borg 1.1 or newer diff automatically detects whether
the archives are created with the same chunker params. If so, only chunk IDs
are compared, which is very fast.
For archives prior to Borg 1.1 chunk contents are compared by default.
If you did not create the archives with different chunker params,
pass --same-chunker-params.
Note that the chunker params changed from Borg 0.xx to 1.0.
See the output of the "borg help patterns" command for more help on exclude patterns.
""")
subparser = subparsers.add_parser('diff', parents=[common_parser],
@ -1282,7 +1304,7 @@ class Archiver:
See the "borg help patterns" command for more help on exclude patterns.
The following keys are available for --format:
The following keys are available for --format when listing files:
""") + ItemFormatter.keys_help()
subparser = subparsers.add_parser('list', parents=[common_parser],
@ -1309,7 +1331,7 @@ class Archiver:
type=location_validator(),
help='repository/archive to list contents of')
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to extract; patterns are supported')
help='paths to list; patterns are supported')
mount_epilog = textwrap.dedent("""
This command mounts an archive as a FUSE filesystem. This can be useful for

View File

@ -293,6 +293,9 @@ class PatternMatcher:
# Value to return from match function when none of the patterns match.
self.fallback = fallback
def empty(self):
return not len(self._items)
def add(self, patterns, value):
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
given patterns matches.
@ -1125,16 +1128,27 @@ class ItemFormatter:
'NL': os.linesep,
}
KEY_DESCRIPTIONS = {
'NEWLINE': 'OS dependent line separator',
'NL': 'alias of NEWLINE',
'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
'csize': 'compressed size',
'bpath': 'verbatim POSIX path, can contain any character except NUL',
'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
'source': 'link target for links (identical to linktarget)',
'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
'csize': 'compressed size',
'num_chunks': 'number of chunks in this file',
'unique_chunks': 'number of unique chunks in this file',
'NEWLINE': 'OS dependent line separator',
'NL': 'alias of NEWLINE',
'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
}
KEY_GROUPS = (
('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget'),
('size', 'csize', 'num_chunks', 'unique_chunks'),
('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
tuple(sorted(hashlib.algorithms_guaranteed)),
('archiveid', 'archivename', 'extra'),
('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
)
@classmethod
def available_keys(cls):
@ -1149,16 +1163,21 @@ class ItemFormatter:
keys = []
keys.extend(formatter.call_keys.keys())
keys.extend(formatter.get_item_data(fake_item).keys())
return sorted(keys, key=lambda s: (s.isupper(), s))
return keys
@classmethod
def keys_help(cls):
help = []
for key in cls.available_keys():
text = " - " + key
if key in cls.KEY_DESCRIPTIONS:
text += ": " + cls.KEY_DESCRIPTIONS[key]
help.append(text)
keys = cls.available_keys()
for group in cls.KEY_GROUPS:
for key in group:
keys.remove(key)
text = " - " + key
if key in cls.KEY_DESCRIPTIONS:
text += ": " + cls.KEY_DESCRIPTIONS[key]
help.append(text)
help.append("")
assert not keys, str(keys)
return "\n".join(help)
def __init__(self, archive, format):

View File

@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
with self.assert_creates_file('input/dir/file'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
def _extract_hardlinks_setup(self):
os.mkdir(os.path.join(self.input_path, 'dir1'))
os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
self.create_regular_file('source')
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'abba'))
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'dir1/hardlink'))
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'dir1/subdir/hardlink'))
self.create_regular_file('dir1/source2')
os.link(os.path.join(self.input_path, 'dir1/source2'),
os.path.join(self.input_path, 'dir1/aaaa'))
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
def test_strip_components_links(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/dir1/hardlink').st_nlink == 4
def test_extract_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', 'input/dir1')
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/dir1/hardlink').st_nlink == 4
def test_extract_include_exclude(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)

View File

@ -374,6 +374,52 @@ Examples
...
.. include:: usage/diff.rst.inc
Examples
~~~~~~~~
::
$ borg init testrepo
$ mkdir testdir
$ cd testdir
$ echo asdf > file1
$ dd if=/dev/urandom bs=1M count=4 > file2
$ touch file3
$ borg create ../testrepo::archive1 .
$ chmod a+x file1
$ echo "something" >> file2
$ borg create ../testrepo::archive2 .
$ rm file3
$ touch file4
$ borg create ../testrepo::archive3 .
$ cd ..
$ borg diff testrepo::archive1 archive2
file1 different mode
archive1 -rw-r--r--
archive2 -rwxr-xr-x
file2 different contents
+28 B, -31 B, 4.19 MB, 4.19 MB
$ borg diff testrepo::archive2 archive3
file3 different contents
+0 B, -0 B, 0 B, <deleted>
$ borg diff testrepo::archive1 archive3
file1 different mode
archive1 -rw-r--r--
archive3 -rwxr-xr-x
file2 different contents
+28 B, -31 B, 4.19 MB, 4.19 MB
file3 different contents
+0 B, -0 B, 0 B, <deleted>
file4 different contents
+0 B, -0 B, <deleted>, 0 B
.. include:: usage/delete.rst.inc
Examples

View File

@ -48,4 +48,13 @@ This command finds differences in files (contents, user, group, mode) between ar
Both archives need to be in the same repository, and a repository location may only
be specified for ARCHIVE1.
For archives created with Borg 1.1 or newer diff automatically detects whether
the archives are created with the same chunker params. If so, only chunk IDs
are compared, which is very fast.
For archives prior to Borg 1.1 chunk contents are compared by default.
If you did not create the archives with different chunker params,
pass --same-chunker-params.
Note that the chunker params changed from Borg 0.xx to 1.0.
See the output of the "borg help patterns" command for more help on exclude patterns.

View File

@ -6,15 +6,16 @@ borg list
usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version]
[--show-rc] [--no-files-cache] [--umask M]
[--remote-path PATH] [--short] [--list-format LISTFORMAT]
[-P PREFIX]
[REPOSITORY_OR_ARCHIVE]
[--remote-path PATH] [--short] [--format FORMAT] [-P PREFIX]
[-e PATTERN] [--exclude-from EXCLUDEFILE]
[REPOSITORY_OR_ARCHIVE] [PATH [PATH ...]]
List archive or repository contents
positional arguments:
REPOSITORY_OR_ARCHIVE
repository/archive to list contents of
PATH paths to list; patterns are supported
optional arguments:
-h, --help show this help message and exit
@ -30,15 +31,64 @@ borg list
--umask M set umask to M (local and remote, default: 0077)
--remote-path PATH set remote path to executable (default: "borg")
--short only print file/directory names, nothing else
--list-format LISTFORMAT
specify format for archive file listing (default:
"{mode} {user:6} {group:6} {size:8d} {isomtime}
{path}{extra}{NEWLINE}") Special "{formatkeys}" exists
to list available keys
--format FORMAT, --list-format FORMAT
specify format for file listing (default: "{mode}
{user:6} {group:6} {size:8d} {isomtime}
{path}{extra}{NL}")
-P PREFIX, --prefix PREFIX
only consider archive names starting with this prefix
-e PATTERN, --exclude PATTERN
exclude paths matching PATTERN
--exclude-from EXCLUDEFILE
read exclude patterns from EXCLUDEFILE, one per line
Description
~~~~~~~~~~~
This command lists the contents of a repository or an archive.
See the "borg help patterns" command for more help on exclude patterns.
The following keys are available for --format when listing files:
- type
- mode
- uid
- gid
- user
- group
- path: path interpreted as text (might be missing non-text characters, see bpath)
- bpath: verbatim POSIX path, can contain any character except NUL
- source: link target for links (identical to linktarget)
- linktarget
- size
- csize: compressed size
- num_chunks: number of chunks in this file
- unique_chunks: number of unique chunks in this file
- mtime
- ctime
- atime
- isomtime
- isoctime
- isoatime
- md5
- sha1
- sha224
- sha256
- sha384
- sha512
- archiveid
- archivename
- extra: prepends {source} with " -> " for soft links and " link to " for hard links
- NEWLINE: OS dependent line separator
- NL: alias of NEWLINE
- NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
- SPACE
- TAB
- CR
- LF