Merge branch 'master' of github.com:borgbackup/borg

This commit is contained in:
Thomas Waldmann 2016-03-21 16:14:24 +01:00
commit 601313836d
7 changed files with 259 additions and 40 deletions

View File

@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
cache.rollback() cache.rollback()
return stats return stats
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False): def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
hardlink_masters=None, original_path=None):
"""
Extract archive item.
:param item: the item to extract
:param restore_attrs: restore file attributes
:param dry_run: do not write any data
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
:param original_path: b'path' key as stored in archive
"""
if dry_run or stdout: if dry_run or stdout:
if b'chunks' in item: if b'chunks' in item:
for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True): for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
sys.stdout.buffer.flush() sys.stdout.buffer.flush()
return return
original_path = original_path or item[b'path']
dest = self.cwd dest = self.cwd
if item[b'path'].startswith('/') or item[b'path'].startswith('..'): if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
raise Exception('Path should be relative and local') raise Exception('Path should be relative and local')
@ -327,13 +340,21 @@ Number of files: {0.stats.nfiles}'''.format(
if stat.S_ISREG(mode): if stat.S_ISREG(mode):
if not os.path.exists(os.path.dirname(path)): if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path)) os.makedirs(os.path.dirname(path))
# Hard link? # Hard link?
if b'source' in item: if b'source' in item:
source = os.path.join(dest, item[b'source']) source = os.path.join(dest, item[b'source'])
if os.path.exists(path): if os.path.exists(path):
os.unlink(path) os.unlink(path)
if not hardlink_masters:
os.link(source, path) os.link(source, path)
else: return
item[b'chunks'], link_target = hardlink_masters[item[b'source']]
if link_target:
# Hard link was extracted previously, just link
os.link(link_target, path)
return
# Extract chunks, since the item which had the chunks was not extracted
with open(path, 'wb') as fd: with open(path, 'wb') as fd:
ids = [c[0] for c in item[b'chunks']] ids = [c[0] for c in item[b'chunks']]
for data in self.pipeline.fetch_many(ids, is_preloaded=True): for data in self.pipeline.fetch_many(ids, is_preloaded=True):
@ -346,6 +367,9 @@ Number of files: {0.stats.nfiles}'''.format(
fd.truncate(pos) fd.truncate(pos)
fd.flush() fd.flush()
self.restore_attrs(path, item, fd=fd.fileno()) self.restore_attrs(path, item, fd=fd.fileno())
if hardlink_masters:
# Update master entry with extracted file path, so that following hardlinks don't extract twice.
hardlink_masters[item.get(b'source') or original_path] = (None, path)
elif stat.S_ISDIR(mode): elif stat.S_ISDIR(mode):
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
source = self.hard_links.get((st.st_ino, st.st_dev)) source = self.hard_links.get((st.st_ino, st.st_dev))
if (st.st_ino, st.st_dev) in self.hard_links: if (st.st_ino, st.st_dev) in self.hard_links:
item = self.stat_attrs(st, path) item = self.stat_attrs(st, path)
item.update({b'path': safe_path, b'source': source}) item.update({
b'path': safe_path,
b'source': source,
})
self.add_item(item) self.add_item(item)
status = 'h' # regular file, hardlink (to already seen inodes) status = 'h' # regular file, hardlink (to already seen inodes)
return status return status
@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
status = 'U' # regular file, unchanged status = 'U' # regular file, unchanged
else: else:
status = 'A' # regular file, added status = 'A' # regular file, added
item = {b'path': safe_path} item = {
b'path': safe_path,
b'hardlink_master': st.st_nlink > 1, # item is a hard link and has the chunks
}
# Only chunkify the file if needed # Only chunkify the file if needed
if chunks is None: if chunks is None:
fh = Archive._open_rb(path) fh = Archive._open_rb(path)
@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
# this set must be kept complete, otherwise the RobustUnpacker might malfunction: # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])

View File

@ -359,8 +359,20 @@ class Archiver:
sparse = args.sparse sparse = args.sparse
strip_components = args.strip_components strip_components = args.strip_components
dirs = [] dirs = []
for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True): partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None
def item_is_hardlink_master(item):
return (partial_extract and stat.S_ISREG(item[b'mode']) and
item.get(b'hardlink_master', True) and b'source' not in item)
for item in archive.iter_items(preload=True,
filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
orig_path = item[b'path'] orig_path = item[b'path']
if item_is_hardlink_master(item):
hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
if not matcher.match(item[b'path']):
continue
if strip_components: if strip_components:
item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:]) item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
if not item[b'path']: if not item[b'path']:
@ -378,7 +390,8 @@ class Archiver:
dirs.append(item) dirs.append(item)
archive.extract_item(item, restore_attrs=False) archive.extract_item(item, restore_attrs=False)
else: else:
archive.extract_item(item, stdout=stdout, sparse=sparse) archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
original_path=orig_path)
except OSError as e: except OSError as e:
self.print_warning('%s: %s', remove_surrogates(orig_path), e) self.print_warning('%s: %s', remove_surrogates(orig_path), e)
@ -1205,6 +1218,15 @@ class Archiver:
Both archives need to be in the same repository, and a repository location may only Both archives need to be in the same repository, and a repository location may only
be specified for ARCHIVE1. be specified for ARCHIVE1.
For archives created with Borg 1.1 or newer diff automatically detects whether
the archives are created with the same chunker params. If so, only chunk IDs
are compared, which is very fast.
For archives prior to Borg 1.1 chunk contents are compared by default.
If you did not create the archives with different chunker params,
pass --same-chunker-params.
Note that the chunker params changed from Borg 0.xx to 1.0.
See the output of the "borg help patterns" command for more help on exclude patterns. See the output of the "borg help patterns" command for more help on exclude patterns.
""") """)
subparser = subparsers.add_parser('diff', parents=[common_parser], subparser = subparsers.add_parser('diff', parents=[common_parser],
@ -1282,7 +1304,7 @@ class Archiver:
See the "borg help patterns" command for more help on exclude patterns. See the "borg help patterns" command for more help on exclude patterns.
The following keys are available for --format: The following keys are available for --format when listing files:
""") + ItemFormatter.keys_help() """) + ItemFormatter.keys_help()
subparser = subparsers.add_parser('list', parents=[common_parser], subparser = subparsers.add_parser('list', parents=[common_parser],
@ -1309,7 +1331,7 @@ class Archiver:
type=location_validator(), type=location_validator(),
help='repository/archive to list contents of') help='repository/archive to list contents of')
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to extract; patterns are supported') help='paths to list; patterns are supported')
mount_epilog = textwrap.dedent(""" mount_epilog = textwrap.dedent("""
This command mounts an archive as a FUSE filesystem. This can be useful for This command mounts an archive as a FUSE filesystem. This can be useful for

View File

@ -293,6 +293,9 @@ class PatternMatcher:
# Value to return from match function when none of the patterns match. # Value to return from match function when none of the patterns match.
self.fallback = fallback self.fallback = fallback
def empty(self):
return not len(self._items)
def add(self, patterns, value): def add(self, patterns, value):
"""Add list of patterns to internal list. The given value is returned from the match function when one of the """Add list of patterns to internal list. The given value is returned from the match function when one of the
given patterns matches. given patterns matches.
@ -1125,16 +1128,27 @@ class ItemFormatter:
'NL': os.linesep, 'NL': os.linesep,
} }
KEY_DESCRIPTIONS = { KEY_DESCRIPTIONS = {
'NEWLINE': 'OS dependent line separator',
'NL': 'alias of NEWLINE',
'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
'csize': 'compressed size',
'bpath': 'verbatim POSIX path, can contain any character except NUL', 'bpath': 'verbatim POSIX path, can contain any character except NUL',
'path': 'path interpreted as text (might be missing non-text characters, see bpath)', 'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
'source': 'link target for links (identical to linktarget)', 'source': 'link target for links (identical to linktarget)',
'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
'csize': 'compressed size',
'num_chunks': 'number of chunks in this file', 'num_chunks': 'number of chunks in this file',
'unique_chunks': 'number of unique chunks in this file', 'unique_chunks': 'number of unique chunks in this file',
'NEWLINE': 'OS dependent line separator',
'NL': 'alias of NEWLINE',
'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
} }
KEY_GROUPS = (
('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget'),
('size', 'csize', 'num_chunks', 'unique_chunks'),
('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
tuple(sorted(hashlib.algorithms_guaranteed)),
('archiveid', 'archivename', 'extra'),
('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
)
@classmethod @classmethod
def available_keys(cls): def available_keys(cls):
@ -1149,16 +1163,21 @@ class ItemFormatter:
keys = [] keys = []
keys.extend(formatter.call_keys.keys()) keys.extend(formatter.call_keys.keys())
keys.extend(formatter.get_item_data(fake_item).keys()) keys.extend(formatter.get_item_data(fake_item).keys())
return sorted(keys, key=lambda s: (s.isupper(), s)) return keys
@classmethod @classmethod
def keys_help(cls): def keys_help(cls):
help = [] help = []
for key in cls.available_keys(): keys = cls.available_keys()
for group in cls.KEY_GROUPS:
for key in group:
keys.remove(key)
text = " - " + key text = " - " + key
if key in cls.KEY_DESCRIPTIONS: if key in cls.KEY_DESCRIPTIONS:
text += ": " + cls.KEY_DESCRIPTIONS[key] text += ": " + cls.KEY_DESCRIPTIONS[key]
help.append(text) help.append(text)
help.append("")
assert not keys, str(keys)
return "\n".join(help) return "\n".join(help)
def __init__(self, archive, format): def __init__(self, archive, format):

View File

@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
with self.assert_creates_file('input/dir/file'): with self.assert_creates_file('input/dir/file'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '0') self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
def _extract_hardlinks_setup(self):
os.mkdir(os.path.join(self.input_path, 'dir1'))
os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
self.create_regular_file('source')
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'abba'))
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'dir1/hardlink'))
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'dir1/subdir/hardlink'))
self.create_regular_file('dir1/source2')
os.link(os.path.join(self.input_path, 'dir1/source2'),
os.path.join(self.input_path, 'dir1/aaaa'))
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
def test_strip_components_links(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/dir1/hardlink').st_nlink == 4
def test_extract_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', 'input/dir1')
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/dir1/hardlink').st_nlink == 4
def test_extract_include_exclude(self): def test_extract_include_exclude(self):
self.cmd('init', self.repository_location) self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('file1', size=1024 * 80)

View File

@ -374,6 +374,52 @@ Examples
... ...
.. include:: usage/diff.rst.inc
Examples
~~~~~~~~
::
$ borg init testrepo
$ mkdir testdir
$ cd testdir
$ echo asdf > file1
$ dd if=/dev/urandom bs=1M count=4 > file2
$ touch file3
$ borg create ../testrepo::archive1 .
$ chmod a+x file1
$ echo "something" >> file2
$ borg create ../testrepo::archive2 .
$ rm file3
$ touch file4
$ borg create ../testrepo::archive3 .
$ cd ..
$ borg diff testrepo::archive1 archive2
file1 different mode
archive1 -rw-r--r--
archive2 -rwxr-xr-x
file2 different contents
+28 B, -31 B, 4.19 MB, 4.19 MB
$ borg diff testrepo::archive2 archive3
file3 different contents
+0 B, -0 B, 0 B, <deleted>
$ borg diff testrepo::archive1 archive3
file1 different mode
archive1 -rw-r--r--
archive3 -rwxr-xr-x
file2 different contents
+28 B, -31 B, 4.19 MB, 4.19 MB
file3 different contents
+0 B, -0 B, 0 B, <deleted>
file4 different contents
+0 B, -0 B, <deleted>, 0 B
.. include:: usage/delete.rst.inc .. include:: usage/delete.rst.inc
Examples Examples

View File

@ -48,4 +48,13 @@ This command finds differences in files (contents, user, group, mode) between ar
Both archives need to be in the same repository, and a repository location may only Both archives need to be in the same repository, and a repository location may only
be specified for ARCHIVE1. be specified for ARCHIVE1.
For archives created with Borg 1.1 or newer diff automatically detects whether
the archives are created with the same chunker params. If so, only chunk IDs
are compared, which is very fast.
For archives prior to Borg 1.1 chunk contents are compared by default.
If you did not create the archives with different chunker params,
pass --same-chunker-params.
Note that the chunker params changed from Borg 0.xx to 1.0.
See the output of the "borg help patterns" command for more help on exclude patterns. See the output of the "borg help patterns" command for more help on exclude patterns.

View File

@ -6,15 +6,16 @@ borg list
usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version] usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version]
[--show-rc] [--no-files-cache] [--umask M] [--show-rc] [--no-files-cache] [--umask M]
[--remote-path PATH] [--short] [--list-format LISTFORMAT] [--remote-path PATH] [--short] [--format FORMAT] [-P PREFIX]
[-P PREFIX] [-e PATTERN] [--exclude-from EXCLUDEFILE]
[REPOSITORY_OR_ARCHIVE] [REPOSITORY_OR_ARCHIVE] [PATH [PATH ...]]
List archive or repository contents List archive or repository contents
positional arguments: positional arguments:
REPOSITORY_OR_ARCHIVE REPOSITORY_OR_ARCHIVE
repository/archive to list contents of repository/archive to list contents of
PATH paths to list; patterns are supported
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
@ -30,15 +31,64 @@ borg list
--umask M set umask to M (local and remote, default: 0077) --umask M set umask to M (local and remote, default: 0077)
--remote-path PATH set remote path to executable (default: "borg") --remote-path PATH set remote path to executable (default: "borg")
--short only print file/directory names, nothing else --short only print file/directory names, nothing else
--list-format LISTFORMAT --format FORMAT, --list-format FORMAT
specify format for archive file listing (default: specify format for file listing (default: "{mode}
"{mode} {user:6} {group:6} {size:8d} {isomtime} {user:6} {group:6} {size:8d} {isomtime}
{path}{extra}{NEWLINE}") Special "{formatkeys}" exists {path}{extra}{NL}")
to list available keys
-P PREFIX, --prefix PREFIX -P PREFIX, --prefix PREFIX
only consider archive names starting with this prefix only consider archive names starting with this prefix
-e PATTERN, --exclude PATTERN
exclude paths matching PATTERN
--exclude-from EXCLUDEFILE
read exclude patterns from EXCLUDEFILE, one per line
Description Description
~~~~~~~~~~~ ~~~~~~~~~~~
This command lists the contents of a repository or an archive. This command lists the contents of a repository or an archive.
See the "borg help patterns" command for more help on exclude patterns.
The following keys are available for --format when listing files:
- type
- mode
- uid
- gid
- user
- group
- path: path interpreted as text (might be missing non-text characters, see bpath)
- bpath: verbatim POSIX path, can contain any character except NUL
- source: link target for links (identical to linktarget)
- linktarget
- size
- csize: compressed size
- num_chunks: number of chunks in this file
- unique_chunks: number of unique chunks in this file
- mtime
- ctime
- atime
- isomtime
- isoctime
- isoatime
- md5
- sha1
- sha224
- sha256
- sha384
- sha512
- archiveid
- archivename
- extra: prepends {source} with " -> " for soft links and " link to " for hard links
- NEWLINE: OS dependent line separator
- NL: alias of NEWLINE
- NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
- SPACE
- TAB
- CR
- LF