Merge branch 'master' of github.com:borgbackup/borg

2016-03-21 16:14:24 +01:00 · 2016-03-21 16:14:24 +01:00 · 601313836d
parent de7582a9d7 2f7d8aaffb
commit 601313836d
7 changed files with 259 additions and 40 deletions
--- a/borg/archive.py
+++ b/borg/archive.py
@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
        cache.rollback()
        return stats
-    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
+    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
                     hardlink_masters=None, original_path=None):
        """
        Extract archive item.
        :param item: the item to extract
        :param restore_attrs: restore file attributes
        :param dry_run: do not write any data
        :param stdout: write extracted data to stdout
        :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
        :param original_path: b'path' key as stored in archive
        """
        if dry_run or stdout:
            if b'chunks' in item:
                for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
                    sys.stdout.buffer.flush()
            return
        original_path = original_path or item[b'path']
        dest = self.cwd
        if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
            raise Exception('Path should be relative and local')
@ -327,13 +340,21 @@ Number of files: {0.stats.nfiles}'''.format(
        if stat.S_ISREG(mode):
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
            # Hard link?
            if b'source' in item:
                source = os.path.join(dest, item[b'source'])
                if os.path.exists(path):
                    os.unlink(path)
                if not hardlink_masters:
                    os.link(source, path)
-            else:
+                    return
                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
                if link_target:
                    # Hard link was extracted previously, just link
                    os.link(link_target, path)
                    return
                # Extract chunks, since the item which had the chunks was not extracted
            with open(path, 'wb') as fd:
                ids = [c[0] for c in item[b'chunks']]
                for data in self.pipeline.fetch_many(ids, is_preloaded=True):
@ -346,6 +367,9 @@ Number of files: {0.stats.nfiles}'''.format(
                fd.truncate(pos)
                fd.flush()
                self.restore_attrs(path, item, fd=fd.fileno())
            if hardlink_masters:
                # Update master entry with extracted file path, so that following hardlinks don't extract twice.
                hardlink_masters[item.get(b'source') or original_path] = (None, path)
        elif stat.S_ISDIR(mode):
            if not os.path.exists(path):
                os.makedirs(path)
@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
            source = self.hard_links.get((st.st_ino, st.st_dev))
            if (st.st_ino, st.st_dev) in self.hard_links:
                item = self.stat_attrs(st, path)
-                item.update({b'path': safe_path, b'source': source})
+                item.update({
                    b'path': safe_path,
                    b'source': source,
                })
                self.add_item(item)
                status = 'h'  # regular file, hardlink (to already seen inodes)
                return status
@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
                status = 'U'  # regular file, unchanged
        else:
            status = 'A'  # regular file, added
-        item = {b'path': safe_path}
+        item = {
            b'path': safe_path,
            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
        }
        # Only chunkify the file if needed
        if chunks is None:
            fh = Archive._open_rb(path)
@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
+ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
--- a/borg/archiver.py
+++ b/borg/archiver.py
@ -359,8 +359,20 @@ class Archiver:
        sparse = args.sparse
        strip_components = args.strip_components
        dirs = []
-        for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
+        partial_extract = not matcher.empty() or strip_components
        hardlink_masters = {} if partial_extract else None
        def item_is_hardlink_master(item):
            return (partial_extract and stat.S_ISREG(item[b'mode']) and
                    item.get(b'hardlink_master', True) and b'source' not in item)
        for item in archive.iter_items(preload=True,
                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
            orig_path = item[b'path']
            if item_is_hardlink_master(item):
                hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
            if not matcher.match(item[b'path']):
                continue
            if strip_components:
                item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
                if not item[b'path']:
@ -378,7 +390,8 @@ class Archiver:
                        dirs.append(item)
                        archive.extract_item(item, restore_attrs=False)
                    else:
-                        archive.extract_item(item, stdout=stdout, sparse=sparse)
+                        archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
                                             original_path=orig_path)
            except OSError as e:
                self.print_warning('%s: %s', remove_surrogates(orig_path), e)
@ -1205,6 +1218,15 @@ class Archiver:
            Both archives need to be in the same repository, and a repository location may only
            be specified for ARCHIVE1.
            For archives created with Borg 1.1 or newer diff automatically detects whether
            the archives are created with the same chunker params. If so, only chunk IDs
            are compared, which is very fast.
            For archives prior to Borg 1.1 chunk contents are compared by default.
            If you did not create the archives with different chunker params,
            pass --same-chunker-params.
            Note that the chunker params changed from Borg 0.xx to 1.0.
            See the output of the "borg help patterns" command for more help on exclude patterns.
            """)
        subparser = subparsers.add_parser('diff', parents=[common_parser],
@ -1282,7 +1304,7 @@ class Archiver:
        See the "borg help patterns" command for more help on exclude patterns.
-        The following keys are available for --format:
+        The following keys are available for --format when listing files:
        """) + ItemFormatter.keys_help()
        subparser = subparsers.add_parser('list', parents=[common_parser],
@ -1309,7 +1331,7 @@ class Archiver:
                               type=location_validator(),
                               help='repository/archive to list contents of')
        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
-                               help='paths to extract; patterns are supported')
+                               help='paths to list; patterns are supported')
        mount_epilog = textwrap.dedent("""
        This command mounts an archive as a FUSE filesystem. This can be useful for
--- a/borg/helpers.py
+++ b/borg/helpers.py
@ -293,6 +293,9 @@ class PatternMatcher:
        # Value to return from match function when none of the patterns match.
        self.fallback = fallback
    def empty(self):
        return not len(self._items)
    def add(self, patterns, value):
        """Add list of patterns to internal list. The given value is returned from the match function when one of the
        given patterns matches.
@ -1125,16 +1128,27 @@ class ItemFormatter:
        'NL': os.linesep,
    }
    KEY_DESCRIPTIONS = {
        'NEWLINE': 'OS dependent line separator',
        'NL': 'alias of NEWLINE',
        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
        'csize': 'compressed size',
        'bpath': 'verbatim POSIX path, can contain any character except NUL',
        'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
        'source': 'link target for links (identical to linktarget)',
        'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
        'csize': 'compressed size',
        'num_chunks': 'number of chunks in this file',
        'unique_chunks': 'number of unique chunks in this file',
        'NEWLINE': 'OS dependent line separator',
        'NL': 'alias of NEWLINE',
        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
    }
    KEY_GROUPS = (
        ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget'),
        ('size', 'csize', 'num_chunks', 'unique_chunks'),
        ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
        tuple(sorted(hashlib.algorithms_guaranteed)),
        ('archiveid', 'archivename', 'extra'),
        ('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
    )
    @classmethod
    def available_keys(cls):
@ -1149,16 +1163,21 @@ class ItemFormatter:
        keys = []
        keys.extend(formatter.call_keys.keys())
        keys.extend(formatter.get_item_data(fake_item).keys())
-        return sorted(keys, key=lambda s: (s.isupper(), s))
+        return keys
    @classmethod
    def keys_help(cls):
        help = []
-        for key in cls.available_keys():
+        keys = cls.available_keys()
        for group in cls.KEY_GROUPS:
            for key in group:
                keys.remove(key)
                text = " - " + key
                if key in cls.KEY_DESCRIPTIONS:
                    text += ": " + cls.KEY_DESCRIPTIONS[key]
                help.append(text)
            help.append("")
        assert not keys, str(keys)
        return "\n".join(help)
    def __init__(self, archive, format):
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
            with self.assert_creates_file('input/dir/file'):
                self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
    def _extract_hardlinks_setup(self):
        os.mkdir(os.path.join(self.input_path, 'dir1'))
        os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
        self.create_regular_file('source')
        os.link(os.path.join(self.input_path, 'source'),
                os.path.join(self.input_path, 'abba'))
        os.link(os.path.join(self.input_path, 'source'),
                os.path.join(self.input_path, 'dir1/hardlink'))
        os.link(os.path.join(self.input_path, 'source'),
                os.path.join(self.input_path, 'dir1/subdir/hardlink'))
        self.create_regular_file('dir1/source2')
        os.link(os.path.join(self.input_path, 'dir1/source2'),
                os.path.join(self.input_path, 'dir1/aaaa'))
        self.cmd('init', self.repository_location)
        self.cmd('create', self.repository_location + '::test', 'input')
    def test_strip_components_links(self):
        self._extract_hardlinks_setup()
        with changedir('output'):
            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
            assert os.stat('hardlink').st_nlink == 2
            assert os.stat('subdir/hardlink').st_nlink == 2
            assert os.stat('aaaa').st_nlink == 2
            assert os.stat('source2').st_nlink == 2
        with changedir('output'):
            self.cmd('extract', self.repository_location + '::test')
            assert os.stat('input/dir1/hardlink').st_nlink == 4
    def test_extract_hardlinks(self):
        self._extract_hardlinks_setup()
        with changedir('output'):
            self.cmd('extract', self.repository_location + '::test', 'input/dir1')
            assert os.stat('input/dir1/hardlink').st_nlink == 2
            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
            assert os.stat('input/dir1/aaaa').st_nlink == 2
            assert os.stat('input/dir1/source2').st_nlink == 2
        with changedir('output'):
            self.cmd('extract', self.repository_location + '::test')
            assert os.stat('input/dir1/hardlink').st_nlink == 4
    def test_extract_include_exclude(self):
        self.cmd('init', self.repository_location)
        self.create_regular_file('file1', size=1024 * 80)
--- a/docs/usage.rst
+++ b/docs/usage.rst
@ -374,6 +374,52 @@ Examples
    ...
 .. include:: usage/diff.rst.inc
 Examples
 ~~~~~~~~
 ::
    $ borg init testrepo
    $ mkdir testdir
    $ cd testdir
    $ echo asdf > file1
    $ dd if=/dev/urandom bs=1M count=4 > file2
    $ touch file3
    $ borg create ../testrepo::archive1 .
    $ chmod a+x file1
    $ echo "something" >> file2
    $ borg create ../testrepo::archive2 .
    $ rm file3
    $ touch file4
    $ borg create ../testrepo::archive3 .
    $ cd ..
    $ borg diff testrepo::archive1 archive2
    file1 different mode
             archive1 -rw-r--r--
             archive2 -rwxr-xr-x
    file2 different contents
             +28 B, -31 B, 4.19 MB, 4.19 MB
    $ borg diff testrepo::archive2 archive3
    file3 different contents
             +0 B, -0 B, 0 B, <deleted>
    $ borg diff testrepo::archive1 archive3
    file1 different mode
             archive1 -rw-r--r--
             archive3 -rwxr-xr-x
    file2 different contents
             +28 B, -31 B, 4.19 MB, 4.19 MB
    file3 different contents
             +0 B, -0 B, 0 B, <deleted>
    file4 different contents
             +0 B, -0 B, <deleted>, 0 B
 .. include:: usage/delete.rst.inc
 Examples
--- a/docs/usage/diff.rst.inc
+++ b/docs/usage/diff.rst.inc
@ -48,4 +48,13 @@ This command finds differences in files (contents, user, group, mode) between ar
 Both archives need to be in the same repository, and a repository location may only
 be specified for ARCHIVE1.
 For archives created with Borg 1.1 or newer diff automatically detects whether
 the archives are created with the same chunker params. If so, only chunk IDs
 are compared, which is very fast.
 For archives prior to Borg 1.1 chunk contents are compared by default.
 If you did not create the archives with different chunker params,
 pass --same-chunker-params.
 Note that the chunker params changed from Borg 0.xx to 1.0.
 See the output of the "borg help patterns" command for more help on exclude patterns.
--- a/docs/usage/list.rst.inc
+++ b/docs/usage/list.rst.inc
@ -6,15 +6,16 @@ borg list
    usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version]
                     [--show-rc] [--no-files-cache] [--umask M]
-                     [--remote-path PATH] [--short] [--list-format LISTFORMAT]
+                     [--remote-path PATH] [--short] [--format FORMAT] [-P PREFIX]
-                     [-P PREFIX]
+                     [-e PATTERN] [--exclude-from EXCLUDEFILE]
-                     [REPOSITORY_OR_ARCHIVE]
+                     [REPOSITORY_OR_ARCHIVE] [PATH [PATH ...]]
    List archive or repository contents
    positional arguments:
      REPOSITORY_OR_ARCHIVE
                            repository/archive to list contents of
      PATH                  paths to list; patterns are supported
    optional arguments:
      -h, --help            show this help message and exit
@ -30,15 +31,64 @@ borg list
      --umask M             set umask to M (local and remote, default: 0077)
      --remote-path PATH    set remote path to executable (default: "borg")
      --short               only print file/directory names, nothing else
-      --list-format LISTFORMAT
+      --format FORMAT, --list-format FORMAT
-                            specify format for archive file listing (default:
+                            specify format for file listing (default: "{mode}
-                            "{mode} {user:6} {group:6} {size:8d} {isomtime}
+                            {user:6} {group:6} {size:8d} {isomtime}
-                            {path}{extra}{NEWLINE}") Special "{formatkeys}" exists
+                            {path}{extra}{NL}")
                            to list available keys
      -P PREFIX, --prefix PREFIX
                            only consider archive names starting with this prefix
      -e PATTERN, --exclude PATTERN
                            exclude paths matching PATTERN
      --exclude-from EXCLUDEFILE
                            read exclude patterns from EXCLUDEFILE, one per line
 Description
 ~~~~~~~~~~~
 This command lists the contents of a repository or an archive.
 See the "borg help patterns" command for more help on exclude patterns.
 The following keys are available for --format when listing files:
 - type
 - mode
 - uid
 - gid
 - user
 - group
 - path: path interpreted as text (might be missing non-text characters, see bpath)
 - bpath: verbatim POSIX path, can contain any character except NUL
 - source: link target for links (identical to linktarget)
 - linktarget
 - size
 - csize: compressed size
 - num_chunks: number of chunks in this file
 - unique_chunks: number of unique chunks in this file
 - mtime
 - ctime
 - atime
 - isomtime
 - isoctime
 - isoatime
 - md5
 - sha1
 - sha224
 - sha256
 - sha384
 - sha512
 - archiveid
 - archivename
 - extra: prepends {source} with " -> " for soft links and " link to " for hard links
 - NEWLINE: OS dependent line separator
 - NL: alias of NEWLINE
 - NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
 - SPACE
 - TAB
 - CR
 - LF