Merge branch 'master' of github.com:borgbackup/borg

2016-03-21 16:14:24 +01:00 · 2016-03-21 16:14:24 +01:00 · 601313836d
parent de7582a9d7 2f7d8aaffb
commit 601313836d
7 changed files with 259 additions and 40 deletions
--- a/borg/archive.py
+++ b/borg/archive.py
@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
        cache.rollback()
        return stats

-    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
+    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
+                     hardlink_masters=None, original_path=None):
+        """
+        Extract archive item.
+
+        :param item: the item to extract
+        :param restore_attrs: restore file attributes
+        :param dry_run: do not write any data
+        :param stdout: write extracted data to stdout
+        :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
+        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
+        :param original_path: b'path' key as stored in archive
+        """
        if dry_run or stdout:
            if b'chunks' in item:
                for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
                    sys.stdout.buffer.flush()
            return

+        original_path = original_path or item[b'path']
        dest = self.cwd
        if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
            raise Exception('Path should be relative and local')
@ -327,13 +340,21 @@ Number of files: {0.stats.nfiles}'''.format(
        if stat.S_ISREG(mode):
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
+
            # Hard link?
            if b'source' in item:
                source = os.path.join(dest, item[b'source'])
                if os.path.exists(path):
                    os.unlink(path)
+                if not hardlink_masters:
                    os.link(source, path)
-            else:
+                    return
+                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
+                if link_target:
+                    # Hard link was extracted previously, just link
+                    os.link(link_target, path)
+                    return
+                # Extract chunks, since the item which had the chunks was not extracted
            with open(path, 'wb') as fd:
                ids = [c[0] for c in item[b'chunks']]
                for data in self.pipeline.fetch_many(ids, is_preloaded=True):
@ -346,6 +367,9 @@ Number of files: {0.stats.nfiles}'''.format(
                fd.truncate(pos)
                fd.flush()
                self.restore_attrs(path, item, fd=fd.fileno())
+            if hardlink_masters:
+                # Update master entry with extracted file path, so that following hardlinks don't extract twice.
+                hardlink_masters[item.get(b'source') or original_path] = (None, path)
        elif stat.S_ISDIR(mode):
            if not os.path.exists(path):
                os.makedirs(path)
@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
            source = self.hard_links.get((st.st_ino, st.st_dev))
            if (st.st_ino, st.st_dev) in self.hard_links:
                item = self.stat_attrs(st, path)
-                item.update({b'path': safe_path, b'source': source})
+                item.update({
+                    b'path': safe_path,
+                    b'source': source,
+                })
                self.add_item(item)
                status = 'h'  # regular file, hardlink (to already seen inodes)
                return status
@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
                status = 'U'  # regular file, unchanged
        else:
            status = 'A'  # regular file, added
-        item = {b'path': safe_path}
+        item = {
+            b'path': safe_path,
+            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
+        }
        # Only chunkify the file if needed
        if chunks is None:
            fh = Archive._open_rb(path)
@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(


 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
+ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])

--- a/borg/archiver.py
+++ b/borg/archiver.py
@ -359,8 +359,20 @@ class Archiver:
        sparse = args.sparse
        strip_components = args.strip_components
        dirs = []
-        for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
+        partial_extract = not matcher.empty() or strip_components
+        hardlink_masters = {} if partial_extract else None
+
+        def item_is_hardlink_master(item):
+            return (partial_extract and stat.S_ISREG(item[b'mode']) and
+                    item.get(b'hardlink_master', True) and b'source' not in item)
+
+        for item in archive.iter_items(preload=True,
+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
            orig_path = item[b'path']
+            if item_is_hardlink_master(item):
+                hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
+            if not matcher.match(item[b'path']):
+                continue
            if strip_components:
                item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
                if not item[b'path']:
@ -378,7 +390,8 @@ class Archiver:
                        dirs.append(item)
                        archive.extract_item(item, restore_attrs=False)
                    else:
-                        archive.extract_item(item, stdout=stdout, sparse=sparse)
+                        archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
+                                             original_path=orig_path)
            except OSError as e:
                self.print_warning('%s: %s', remove_surrogates(orig_path), e)

@ -1205,6 +1218,15 @@ class Archiver:
            Both archives need to be in the same repository, and a repository location may only
            be specified for ARCHIVE1.

+            For archives created with Borg 1.1 or newer diff automatically detects whether
+            the archives are created with the same chunker params. If so, only chunk IDs
+            are compared, which is very fast.
+
+            For archives prior to Borg 1.1 chunk contents are compared by default.
+            If you did not create the archives with different chunker params,
+            pass --same-chunker-params.
+            Note that the chunker params changed from Borg 0.xx to 1.0.
+
            See the output of the "borg help patterns" command for more help on exclude patterns.
            """)
        subparser = subparsers.add_parser('diff', parents=[common_parser],
@ -1282,7 +1304,7 @@ class Archiver:

        See the "borg help patterns" command for more help on exclude patterns.

-        The following keys are available for --format:
+        The following keys are available for --format when listing files:

        """) + ItemFormatter.keys_help()
        subparser = subparsers.add_parser('list', parents=[common_parser],
@ -1309,7 +1331,7 @@ class Archiver:
                               type=location_validator(),
                               help='repository/archive to list contents of')
        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
-                               help='paths to extract; patterns are supported')
+                               help='paths to list; patterns are supported')

        mount_epilog = textwrap.dedent("""
        This command mounts an archive as a FUSE filesystem. This can be useful for
--- a/borg/helpers.py
+++ b/borg/helpers.py
@ -293,6 +293,9 @@ class PatternMatcher:
        # Value to return from match function when none of the patterns match.
        self.fallback = fallback

+    def empty(self):
+        return not len(self._items)
+
    def add(self, patterns, value):
        """Add list of patterns to internal list. The given value is returned from the match function when one of the
        given patterns matches.
@ -1125,16 +1128,27 @@ class ItemFormatter:
        'NL': os.linesep,
    }
    KEY_DESCRIPTIONS = {
-        'NEWLINE': 'OS dependent line separator',
-        'NL': 'alias of NEWLINE',
-        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
-        'csize': 'compressed size',
        'bpath': 'verbatim POSIX path, can contain any character except NUL',
        'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
        'source': 'link target for links (identical to linktarget)',
+        'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
+
+        'csize': 'compressed size',
        'num_chunks': 'number of chunks in this file',
        'unique_chunks': 'number of unique chunks in this file',
+
+        'NEWLINE': 'OS dependent line separator',
+        'NL': 'alias of NEWLINE',
+        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
    }
+    KEY_GROUPS = (
+        ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget'),
+        ('size', 'csize', 'num_chunks', 'unique_chunks'),
+        ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
+        tuple(sorted(hashlib.algorithms_guaranteed)),
+        ('archiveid', 'archivename', 'extra'),
+        ('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
+    )

    @classmethod
    def available_keys(cls):
@ -1149,16 +1163,21 @@ class ItemFormatter:
        keys = []
        keys.extend(formatter.call_keys.keys())
        keys.extend(formatter.get_item_data(fake_item).keys())
-        return sorted(keys, key=lambda s: (s.isupper(), s))
+        return keys

    @classmethod
    def keys_help(cls):
        help = []
-        for key in cls.available_keys():
+        keys = cls.available_keys()
+        for group in cls.KEY_GROUPS:
+            for key in group:
+                keys.remove(key)
                text = " - " + key
                if key in cls.KEY_DESCRIPTIONS:
                    text += ": " + cls.KEY_DESCRIPTIONS[key]
                help.append(text)
+            help.append("")
+        assert not keys, str(keys)
        return "\n".join(help)

    def __init__(self, archive, format):
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
            with self.assert_creates_file('input/dir/file'):
                self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')

+    def _extract_hardlinks_setup(self):
+        os.mkdir(os.path.join(self.input_path, 'dir1'))
+        os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
+
+        self.create_regular_file('source')
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'abba'))
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'dir1/hardlink'))
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'dir1/subdir/hardlink'))
+
+        self.create_regular_file('dir1/source2')
+        os.link(os.path.join(self.input_path, 'dir1/source2'),
+                os.path.join(self.input_path, 'dir1/aaaa'))
+
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+
+    def test_strip_components_links(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+            assert os.stat('hardlink').st_nlink == 2
+            assert os.stat('subdir/hardlink').st_nlink == 2
+            assert os.stat('aaaa').st_nlink == 2
+            assert os.stat('source2').st_nlink == 2
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+
+    def test_extract_hardlinks(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', 'input/dir1')
+            assert os.stat('input/dir1/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/aaaa').st_nlink == 2
+            assert os.stat('input/dir1/source2').st_nlink == 2
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+
    def test_extract_include_exclude(self):
        self.cmd('init', self.repository_location)
        self.create_regular_file('file1', size=1024 * 80)
--- a/docs/usage.rst
+++ b/docs/usage.rst
@ -374,6 +374,52 @@ Examples
    ...


+
+.. include:: usage/diff.rst.inc
+
+Examples
+~~~~~~~~
+::
+
+    $ borg init testrepo
+    $ mkdir testdir
+    $ cd testdir
+    $ echo asdf > file1
+    $ dd if=/dev/urandom bs=1M count=4 > file2
+    $ touch file3
+    $ borg create ../testrepo::archive1 .
+
+    $ chmod a+x file1
+    $ echo "something" >> file2
+    $ borg create ../testrepo::archive2 .
+
+    $ rm file3
+    $ touch file4
+    $ borg create ../testrepo::archive3 .
+
+    $ cd ..
+    $ borg diff testrepo::archive1 archive2
+    file1 different mode
+             archive1 -rw-r--r--
+             archive2 -rwxr-xr-x
+    file2 different contents
+             +28 B, -31 B, 4.19 MB, 4.19 MB
+
+    $ borg diff testrepo::archive2 archive3
+    file3 different contents
+             +0 B, -0 B, 0 B, <deleted>
+
+    $ borg diff testrepo::archive1 archive3
+    file1 different mode
+             archive1 -rw-r--r--
+             archive3 -rwxr-xr-x
+    file2 different contents
+             +28 B, -31 B, 4.19 MB, 4.19 MB
+    file3 different contents
+             +0 B, -0 B, 0 B, <deleted>
+    file4 different contents
+             +0 B, -0 B, <deleted>, 0 B
+
 .. include:: usage/delete.rst.inc

 Examples
--- a/docs/usage/diff.rst.inc
+++ b/docs/usage/diff.rst.inc
@ -48,4 +48,13 @@ This command finds differences in files (contents, user, group, mode) between ar
 Both archives need to be in the same repository, and a repository location may only
 be specified for ARCHIVE1.

+For archives created with Borg 1.1 or newer diff automatically detects whether
+the archives are created with the same chunker params. If so, only chunk IDs
+are compared, which is very fast.
+
+For archives prior to Borg 1.1 chunk contents are compared by default.
+If you did not create the archives with different chunker params,
+pass --same-chunker-params.
+Note that the chunker params changed from Borg 0.xx to 1.0.
+
 See the output of the "borg help patterns" command for more help on exclude patterns.
--- a/docs/usage/list.rst.inc
+++ b/docs/usage/list.rst.inc
@ -6,15 +6,16 @@ borg list

    usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version]
                     [--show-rc] [--no-files-cache] [--umask M]
-                     [--remote-path PATH] [--short] [--list-format LISTFORMAT]
-                     [-P PREFIX]
-                     [REPOSITORY_OR_ARCHIVE]
+                     [--remote-path PATH] [--short] [--format FORMAT] [-P PREFIX]
+                     [-e PATTERN] [--exclude-from EXCLUDEFILE]
+                     [REPOSITORY_OR_ARCHIVE] [PATH [PATH ...]]
    
    List archive or repository contents
    
    positional arguments:
      REPOSITORY_OR_ARCHIVE
                            repository/archive to list contents of
+      PATH                  paths to list; patterns are supported
    
    optional arguments:
      -h, --help            show this help message and exit
@ -30,15 +31,64 @@ borg list
      --umask M             set umask to M (local and remote, default: 0077)
      --remote-path PATH    set remote path to executable (default: "borg")
      --short               only print file/directory names, nothing else
-      --list-format LISTFORMAT
-                            specify format for archive file listing (default:
-                            "{mode} {user:6} {group:6} {size:8d} {isomtime}
-                            {path}{extra}{NEWLINE}") Special "{formatkeys}" exists
-                            to list available keys
+      --format FORMAT, --list-format FORMAT
+                            specify format for file listing (default: "{mode}
+                            {user:6} {group:6} {size:8d} {isomtime}
+                            {path}{extra}{NL}")
      -P PREFIX, --prefix PREFIX
                            only consider archive names starting with this prefix
+      -e PATTERN, --exclude PATTERN
+                            exclude paths matching PATTERN
+      --exclude-from EXCLUDEFILE
+                            read exclude patterns from EXCLUDEFILE, one per line
    
 Description
 ~~~~~~~~~~~

 This command lists the contents of a repository or an archive.
+
+See the "borg help patterns" command for more help on exclude patterns.
+
+The following keys are available for --format when listing files:
+
+ - type
+ - mode
+ - uid
+ - gid
+ - user
+ - group
+ - path: path interpreted as text (might be missing non-text characters, see bpath)
+ - bpath: verbatim POSIX path, can contain any character except NUL
+ - source: link target for links (identical to linktarget)
+ - linktarget
+
+ - size
+ - csize: compressed size
+ - num_chunks: number of chunks in this file
+ - unique_chunks: number of unique chunks in this file
+
+ - mtime
+ - ctime
+ - atime
+ - isomtime
+ - isoctime
+ - isoatime
+
+ - md5
+ - sha1
+ - sha224
+ - sha256
+ - sha384
+ - sha512
+
+ - archiveid
+ - archivename
+ - extra: prepends {source} with " -> " for soft links and " link to " for hard links
+
+ - NEWLINE: OS dependent line separator
+ - NL: alias of NEWLINE
+ - NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
+ - SPACE
+ - TAB
+ - CR
+ - LF