Merge branch 'master' into faster-cache-sync

2015-09-10 23:12:55 +02:00 · 2015-09-10 23:12:55 +02:00 · 26bde96a3a
parent 54ccbc5ae2 1eecb020e8
commit 26bde96a3a
12 changed files with 373 additions and 104 deletions
--- a/CHANGES.rst
+++ b/CHANGES.rst
@ -2,6 +2,27 @@ Borg Changelog
 ==============


+Version 0.26.0 (not released yet)
+---------------------------------
+
+New features:
+
+- BORG_REPO env var to specify the default repo, #168
+- read special files as if they were regular files, #79
+
+Bug fixes:
+
+- borg mount repo: use absolute path, attic #200, attic #137
+- chunker: use off_t to get 64bit on 32bit platform, #178
+- initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0)
+- fix reaction to "no" answer at delete repo prompt, #182
+
+Other changes:
+
+- detect inconsistency / corruption / hash collision, #170
+- replace versioneer with setuptools_scm, #106
+
+
 Version 0.25.0
 --------------

--- a/borg/_chunker.c
+++ b/borg/_chunker.c
@ -83,7 +83,8 @@ typedef struct {
    PyObject *fd;
    int fh;
    int done, eof;
-    size_t remaining, bytes_read, bytes_yielded, position, last;
+    size_t remaining, position, last;
+    off_t bytes_read, bytes_yielded;
 } Chunker;

 static Chunker *
@ -96,6 +97,7 @@ chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32
    c->table = buzhash_init_table(seed);
    c->buf_size = max_size;
    c->data = malloc(c->buf_size);
+    c->fh = -1;
    return c;
 }

@ -128,7 +130,7 @@ static int
 chunker_fill(Chunker *c)
 {
    ssize_t n;
-    size_t offset, length;
+    off_t offset, length;
    PyObject *data;
    memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
    c->position -= c->last;
@ -161,7 +163,7 @@ chunker_fill(Chunker *c)
        // size limit) kick out data from the cache that might be still useful
        // for the OS or other processes.
        if (length > 0) {
-            posix_fadvise(c->fh, (off_t) offset, (off_t) length, POSIX_FADV_DONTNEED);
+            posix_fadvise(c->fh, offset, length, POSIX_FADV_DONTNEED);
        }
        #endif
    }
--- a/borg/archive.py
+++ b/borg/archive.py
@ -455,6 +455,7 @@ class Archive:
            b'mtime': int_to_bigint(int(time.time()) * 1000000000)
        }
        self.add_item(item)
+        return 'i'  # stdin

    def process_file(self, path, st, cache):
        status = None
--- a/borg/archiver.py
+++ b/borg/archiver.py
@ -102,17 +102,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")

    def do_create(self, args):
        """Create new archive"""
+        dry_run = args.dry_run
        t0 = datetime.now()
-        repository = self.open_repository(args.archive, exclusive=True)
-        manifest, key = Manifest.load(repository)
-        compr_args = dict(buffer=COMPR_BUFFER)
-        compr_args.update(args.compression)
-        key.compressor = Compressor(**compr_args)
-        cache = Cache(repository, key, manifest, do_files=args.cache_files)
-        archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
-                          create=True, checkpoint_interval=args.checkpoint_interval,
-                          numeric_owner=args.numeric_owner, progress=args.progress,
-                          chunker_params=args.chunker_params)
+        if not dry_run:
+            repository = self.open_repository(args.archive, exclusive=True)
+            manifest, key = Manifest.load(repository)
+            compr_args = dict(buffer=COMPR_BUFFER)
+            compr_args.update(args.compression)
+            key.compressor = Compressor(**compr_args)
+            cache = Cache(repository, key, manifest, do_files=args.cache_files)
+            archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
+                              create=True, checkpoint_interval=args.checkpoint_interval,
+                              numeric_owner=args.numeric_owner, progress=args.progress,
+                              chunker_params=args.chunker_params)
+        else:
+            archive = cache = None
        # Add cache dir to inode_skip list
        skip_inodes = set()
        try:
@ -130,11 +134,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        for path in args.paths:
            if path == '-':  # stdin
                path = 'stdin'
-                self.print_verbose(path)
-                try:
-                    archive.process_stdin(path, cache)
-                except IOError as e:
-                    self.print_error('%s: %s', path, e)
+                if not dry_run:
+                    try:
+                        status = archive.process_stdin(path, cache)
+                    except IOError as e:
+                        self.print_error('%s: %s', path, e)
+                else:
+                    status = '-'
+                self.print_verbose("%1s %s", status, path)
                continue
            path = os.path.normpath(path)
            if args.dontcross:
@ -145,25 +152,28 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                    continue
            else:
                restrict_dev = None
-            self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev)
-        archive.save(timestamp=args.timestamp)
-        if args.progress:
-            archive.stats.show_progress(final=True)
-        if args.stats:
-            t = datetime.now()
-            diff = t - t0
-            print('-' * 78)
-            print('Archive name: %s' % args.archive.archive)
-            print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
-            print('Start time: %s' % t0.strftime('%c'))
-            print('End time: %s' % t.strftime('%c'))
-            print('Duration: %s' % format_timedelta(diff))
-            print('Number of files: %d' % archive.stats.nfiles)
-            archive.stats.print_('This archive:', cache)
-            print('-' * 78)
+            self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev,
+                          read_special=args.read_special, dry_run=dry_run)
+        if not dry_run:
+            archive.save(timestamp=args.timestamp)
+            if args.progress:
+                archive.stats.show_progress(final=True)
+            if args.stats:
+                t = datetime.now()
+                diff = t - t0
+                print('-' * 78)
+                print('Archive name: %s' % args.archive.archive)
+                print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
+                print('Start time: %s' % t0.strftime('%c'))
+                print('End time: %s' % t.strftime('%c'))
+                print('Duration: %s' % format_timedelta(diff))
+                print('Number of files: %d' % archive.stats.nfiles)
+                archive.stats.print_('This archive:', cache)
+                print('-' * 78)
        return self.exit_code

-    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev):
+    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev,
+                 read_special=False, dry_run=False):
        if exclude_path(path, excludes):
            return
        try:
@ -180,15 +190,18 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        # Ignore if nodump flag is set
        if has_lchflags and (st.st_flags & stat.UF_NODUMP):
            return
-        if stat.S_ISREG(st.st_mode):
-            try:
-                status = archive.process_file(path, st, cache)
-            except IOError as e:
-                self.print_error('%s: %s', path, e)
+        if (stat.S_ISREG(st.st_mode) or
+            read_special and not stat.S_ISDIR(st.st_mode)):
+            if not dry_run:
+                try:
+                    status = archive.process_file(path, st, cache)
+                except IOError as e:
+                    self.print_error('%s: %s', path, e)
        elif stat.S_ISDIR(st.st_mode):
            if exclude_caches and is_cachedir(path):
                return
-            status = archive.process_dir(path, st)
+            if not dry_run:
+                status = archive.process_dir(path, st)
            try:
                entries = os.listdir(path)
            except OSError as e:
@ -197,13 +210,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                for filename in sorted(entries):
                    entry_path = os.path.normpath(os.path.join(path, filename))
                    self._process(archive, cache, excludes, exclude_caches, skip_inodes,
-                                  entry_path, restrict_dev)
+                                  entry_path, restrict_dev, read_special=read_special,
+                                  dry_run=dry_run)
        elif stat.S_ISLNK(st.st_mode):
-            status = archive.process_symlink(path, st)
+            if not dry_run:
+                status = archive.process_symlink(path, st)
        elif stat.S_ISFIFO(st.st_mode):
-            status = archive.process_fifo(path, st)
+            if not dry_run:
+                status = archive.process_fifo(path, st)
        elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
-            status = archive.process_dev(path, st)
+            if not dry_run:
+                status = archive.process_dev(path, st)
        elif stat.S_ISSOCK(st.st_mode):
            # Ignore unix sockets
            return
@ -219,7 +236,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        # Note: A/M/U is relative to the "files" cache, not to the repo.
        # This would be an issue if the files cache is not used.
        if status is None:
-            status = '?'  # need to add a status code somewhere
+            if not dry_run:
+                status = '?'  # need to add a status code somewhere
+            else:
+                status = '-'  # dry run, item was not backed up
        # output ALL the stuff - it can be easily filtered using grep.
        # even stuff considered unchanged might be interesting.
        self.print_verbose("%1s %s", status, remove_surrogates(path))
@ -296,10 +316,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
            print("You requested to completely DELETE the repository *including* all archives it contains:")
            for archive_info in manifest.list_archive_infos(sort_by='ts'):
                print(format_archive(archive_info))
-            while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
+            if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
                print("""Type "YES" if you understand this and want to continue.\n""")
-                if input('Do you want to continue? ') == 'YES':
-                    break
+                if input('Do you want to continue? ') != 'YES':
+                    self.exit_code = 1
+                    return self.exit_code
            repository.destroy()
            cache.destroy()
            print("Repository and corresponding cache were deleted.")
@ -556,7 +577,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                          description=self.do_init.__doc__, epilog=init_epilog,
                                          formatter_class=argparse.RawDescriptionHelpFormatter)
        subparser.set_defaults(func=self.do_init)
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                               type=location_validator(archive=False),
                               help='repository to create')
        subparser.add_argument('-e', '--encryption', dest='encryption',
@ -604,7 +625,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                          epilog=check_epilog,
                                          formatter_class=argparse.RawDescriptionHelpFormatter)
        subparser.set_defaults(func=self.do_check)
-        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE',
+        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
                               type=location_validator(),
                               help='repository or archive to check consistency of')
        subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
@ -629,7 +650,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                          epilog=change_passphrase_epilog,
                                          formatter_class=argparse.RawDescriptionHelpFormatter)
        subparser.set_defaults(func=self.do_change_passphrase)
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                               type=location_validator(archive=False))

        create_epilog = textwrap.dedent("""
@ -687,6 +708,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                    'zlib,0 .. zlib,9 == zlib (with level 0..9), '
                                    'lzma == lzma (default level 6), '
                                    'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        subparser.add_argument('--read-special', dest='read_special',
+                               action='store_true', default=False,
+                               help='open and read special files as if they were regular files')
+        subparser.add_argument('-n', '--dry-run', dest='dry_run',
+                               action='store_true', default=False,
+                               help='do not create a backup archive')
        subparser.add_argument('archive', metavar='ARCHIVE',
                               type=location_validator(archive=True),
                               help='archive to create')
@ -760,7 +787,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        subparser.add_argument('-s', '--stats', dest='stats',
                               action='store_true', default=False,
                               help='print statistics for the deleted archive')
-        subparser.add_argument('target', metavar='TARGET',
+        subparser.add_argument('target', metavar='TARGET', nargs='?', default='',
                               type=location_validator(),
                               help='archive or repository to delete')

@ -775,7 +802,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        subparser.add_argument('--short', dest='short',
                               action='store_true', default=False,
                               help='only print file/directory names, nothing else')
-        subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(),
+        subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
+                               type=location_validator(),
                               help='repository/archive to list contents of')
        mount_epilog = textwrap.dedent("""
        This command mounts an archive as a FUSE filesystem. This can be useful for
@ -858,7 +886,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                               help='number of yearly archives to keep')
        subparser.add_argument('-p', '--prefix', dest='prefix', type=str,
                               help='only consider archive names starting with this prefix')
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                               type=location_validator(archive=False),
                               help='repository to prune')

--- a/borg/cache.py
+++ b/borg/cache.py
@ -3,6 +3,7 @@ from .remote import cache_if_remote
 import errno
 import msgpack
 import os
+import stat
 import sys
 from binascii import hexlify
 import shutil
@ -327,9 +328,9 @@ class Cache:
    def add_chunk(self, id, data, stats):
        if not self.txn_active:
            self.begin_txn()
-        if self.seen_chunk(id):
-            return self.chunk_incref(id, stats)
        size = len(data)
+        if self.seen_chunk(id, size):
+            return self.chunk_incref(id, stats)
        data = self.key.encrypt(data)
        csize = len(data)
        self.repository.put(id, data, wait=False)
@ -337,8 +338,14 @@ class Cache:
        stats.update(size, csize, True)
        return id, size, csize

-    def seen_chunk(self, id):
-        return self.chunks.get(id, (0, 0, 0))[0]
+    def seen_chunk(self, id, size=None):
+        refcount, stored_size, _ = self.chunks.get(id, (0, None, None))
+        if size is not None and stored_size is not None and size != stored_size:
+            # we already have a chunk with that id, but different size.
+            # this is either a hash collision (unlikely) or corruption or a bug.
+            raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % (
+                            id, stored_size, size))
+        return refcount

    def chunk_incref(self, id, stats):
        if not self.txn_active:
@ -361,7 +368,7 @@ class Cache:
            stats.update(-size, -csize, False)

    def file_known_and_unchanged(self, path_hash, st):
-        if not self.do_files:
+        if not (self.do_files and stat.S_ISREG(st.st_mode)):
            return None
        if self.files is None:
            self._read_files()
@ -378,7 +385,7 @@ class Cache:
            return None

    def memorize_file(self, path_hash, st, ids):
-        if not self.do_files:
+        if not (self.do_files and stat.S_ISREG(st.st_mode)):
            return
        # Entry: Age, inode, size, mtime, chunk ids
        mtime_ns = st_mtime_ns(st)
--- a/borg/chunker.pyx
+++ b/borg/chunker.pyx
@ -20,7 +20,7 @@ cdef extern from "_chunker.c":
 cdef class Chunker:
    cdef _Chunker *chunker

-    def __cinit__(self, seed, chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size):
+    def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
        min_size = 1 << chunk_min_exp
        max_size = 1 << chunk_max_exp
        hash_mask = (1 << hash_mask_bits) - 1
--- a/borg/crypto.pyx
+++ b/borg/crypto.pyx
@ -52,7 +52,7 @@ bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
 long_to_bytes = lambda x: _long.pack(x)


-def num_aes_blocks(length):
+def num_aes_blocks(int length):
    """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data.
       Note: this is only correct for modes without padding, like AES-CTR.
    """
--- a/borg/helpers.py
+++ b/borg/helpers.py
@ -1,12 +1,15 @@
 import argparse
 import binascii
 from collections import namedtuple
+from functools import wraps
 import grp
 import os
 import pwd
 import re
 import sys
 import time
+import unicodedata
+
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from operator import attrgetter
@ -220,6 +223,23 @@ def exclude_path(path, patterns):
 # unify the two cases, we add a path separator to the end of
 # the path before matching.

+def normalized(func):
+    """ Decorator for the Pattern match methods, returning a wrapper that
+    normalizes OSX paths to match the normalized pattern on OSX, and 
+    returning the original method on other platforms"""
+    @wraps(func)
+    def normalize_wrapper(self, path):
+        return func(self, unicodedata.normalize("NFD", path))
+
+    if sys.platform in ('darwin',):
+        # HFS+ converts paths to a canonical form, so users shouldn't be
+        # required to enter an exact match
+        return normalize_wrapper
+    else:
+        # Windows and Unix filesystems allow different forms, so users
+        # always have to enter an exact match
+        return func
+
 class IncludePattern:
    """Literal files or directories listed on the command line
    for some operations (e.g. extract, but not create).
@ -227,8 +247,12 @@ class IncludePattern:
    path match as well.  A trailing slash makes no difference.
    """
    def __init__(self, pattern):
+        if sys.platform in ('darwin',):
+            pattern = unicodedata.normalize("NFD", pattern)
+
        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep

+    @normalized
    def match(self, path):
        return (path+os.path.sep).startswith(self.pattern)

@ -245,10 +269,15 @@ class ExcludePattern(IncludePattern):
            self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
        else:
            self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
+
+        if sys.platform in ('darwin',):
+            self.pattern = unicodedata.normalize("NFD", self.pattern)
+
        # fnmatch and re.match both cache compiled regular expressions.
        # Nevertheless, this is about 10 times faster.
        self.regex = re.compile(translate(self.pattern))

+    @normalized
    def match(self, path):
        return self.regex.match(path+os.path.sep) is not None

@ -466,13 +495,34 @@ class Location:
                         r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
    scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'
                        r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
+    # get the repo from BORG_RE env and the optional archive from param.
+    # if the syntax requires giving REPOSITORY (see "borg mount"),
+    # use "::" to let it use the env var.
+    # if REPOSITORY argument is optional, it'll automatically use the env.
+    env_re = re.compile(r'(?:::(?P<archive>.+)?)?$')

-    def __init__(self, text):
+    def __init__(self, text=''):
        self.orig = text
-        if not self.parse(text):
+        if not self.parse(self.orig):
            raise ValueError

    def parse(self, text):
+        valid = self._parse(text)
+        if valid:
+            return True
+        m = self.env_re.match(text)
+        if not m:
+            return False
+        repo = os.environ.get('BORG_REPO')
+        if repo is None:
+            return False
+        valid = self._parse(repo)
+        if not valid:
+            return False
+        self.archive = m.group('archive')
+        return True
+
+    def _parse(self, text):
        m = self.ssh_re.match(text)
        if m:
            self.proto = m.group('proto')
--- a/borg/repository.py
+++ b/borg/repository.py
@ -50,14 +50,14 @@ class Repository:
        """Object with key {} not found in repository {}."""

    def __init__(self, path, create=False, exclusive=False):
-        self.path = path
+        self.path = os.path.abspath(path)
        self.io = None
        self.lock = None
        self.index = None
        self._active_txn = False
        if create:
-            self.create(path)
-        self.open(path, exclusive)
+            self.create(self.path)
+        self.open(self.path, exclusive)

    def __del__(self):
        self.close()
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@ -485,6 +485,14 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        mode = os.stat(self.repository_path).st_mode
        self.assertEqual(stat.S_IMODE(mode), 0o700)

+    def test_create_dry_run(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', '--dry-run', self.repository_location + '::test', 'input')
+        # Make sure no archive has been created
+        repository = Repository(self.repository_path)
+        manifest, key = Manifest.load(repository)
+        self.assert_equal(len(manifest.archives), 0)
+
    def test_cmdline_compatibility(self):
        self.create_regular_file('file1', size=1024 * 80)
        self.cmd('init', self.repository_location)
--- a/borg/testsuite/helpers.py
+++ b/borg/testsuite/helpers.py
@ -3,9 +3,10 @@ from time import mktime, strptime
 from datetime import datetime, timezone, timedelta

 import pytest
+import sys
 import msgpack

-from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
+from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \
    prune_within, prune_split, \
    StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams
 from . import BaseTestCase
@ -23,42 +24,115 @@ class BigIntTestCase(BaseTestCase):
        self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)


-class LocationTestCase(BaseTestCase):
+class TestLocationWithoutEnv:
+    def test_ssh(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('ssh://user@host:1234/some/path::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
+        assert repr(Location('ssh://user@host:1234/some/path')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)"

-    def test(self):
-        self.assert_equal(
-            repr(Location('ssh://user@host:1234/some/path::archive')),
-            "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('file:///some/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('user@host:/some/path::archive')),
-            "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('/some/absolute/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('some/relative/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
-        )
-        self.assert_raises(ValueError, lambda: Location('ssh://localhost:22/path:archive'))
+    def test_file(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('file:///some/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
+        assert repr(Location('file:///some/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)"

-    def test_canonical_path(self):
+    def test_scp(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('user@host:/some/path::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
+        assert repr(Location('user@host:/some/path')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)"
+
+    def test_folder(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
+        assert repr(Location('path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)"
+
+    def test_abspath(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('/some/absolute/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
+        assert repr(Location('/some/absolute/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)"
+
+    def test_relpath(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('some/relative/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
+        assert repr(Location('some/relative/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)"
+
+    def test_underspecified(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        with pytest.raises(ValueError):
+            Location('::archive')
+        with pytest.raises(ValueError):
+            Location('::')
+        with pytest.raises(ValueError):
+            Location()
+
+    def test_no_double_colon(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        with pytest.raises(ValueError):
+            Location('ssh://localhost:22/path:archive')
+
+    def test_canonical_path(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
        locations = ['some/path::archive', 'file://some/path::archive', 'host:some/path::archive',
                     'host:~user/some/path::archive', 'ssh://host/some/path::archive',
                     'ssh://user@host:1234/some/path::archive']
        for location in locations:
-            self.assert_equal(Location(location).canonical_path(),
-                              Location(Location(location).canonical_path()).canonical_path())
+            assert Location(location).canonical_path() == \
+                   Location(Location(location).canonical_path()).canonical_path()
+
+
+class TestLocationWithEnv:
+    def test_ssh(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'ssh://user@host:1234/some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)"
+
+    def test_file(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'file:///some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)"
+
+    def test_scp(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'user@host:/some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)"
+
+    def test_folder(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)"
+
+    def test_abspath(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', '/some/absolute/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)"
+
+    def test_relpath(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'some/relative/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)"


 class FormatTimedeltaTestCase(BaseTestCase):
@ -105,6 +179,72 @@ class PatternTestCase(BaseTestCase):
                          ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])


+@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test')
+class PatternNonAsciiTestCase(BaseTestCase):
+    def testComposedUnicode(self):
+        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+
+    def testDecomposedUnicode(self):
+        pattern = 'ba\N{COMBINING ACUTE ACCENT}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testInvalidUnicode(self):
+        pattern = str(b'ba\x80', 'latin1')
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("ba/foo")
+        assert i.match(str(b"ba\x80/foo", 'latin1'))
+        assert not e.match("ba/foo")
+        assert e.match(str(b"ba\x80/foo", 'latin1'))
+
+
+@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
+class OSXPatternNormalizationTestCase(BaseTestCase):
+    def testComposedUnicode(self):
+        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testDecomposedUnicode(self):
+        pattern = 'ba\N{COMBINING ACUTE ACCENT}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testInvalidUnicode(self):
+        pattern = str(b'ba\x80', 'latin1')
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("ba/foo")
+        assert i.match(str(b"ba\x80/foo", 'latin1'))
+        assert not e.match("ba/foo")
+        assert e.match(str(b"ba\x80/foo", 'latin1'))
+
+
 def test_compression_specs():
    with pytest.raises(ValueError):
        CompressionSpec('')
--- a/docs/usage.rst
+++ b/docs/usage.rst
@ -41,9 +41,15 @@ Environment Variables

 |project_name| uses some environment variables for automation:

-Specifying a passphrase:
+General:
+    BORG_REPO
+        When set, use the value to give the default repository location. If a command needs an archive
+        parameter, you can abbreviate as `::archive`. If a command needs a repository parameter, you
+        can either leave it away or abbreviate as `::`, if a positional parameter is required.
    BORG_PASSPHRASE
        When set, use the value to answer the passphrase question for encrypted repositories.
+    TMPDIR
+        where temporary files are stored (might need a lot of temporary space for some operations)

 Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
    BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK
@ -64,10 +70,6 @@ Building:
    BORG_OPENSSL_PREFIX
        Adds given OpenSSL header file directory to the default locations (setup.py).

-General:
-    TMPDIR
-        where temporary files are stored (might need a lot of temporary space for some operations)
-

 Please note:

@ -210,6 +212,11 @@ Examples
    # Even slower, even higher compression (N = 0..9)
    $ borg create --compression lzma,N /mnt/backup::repo ~

+    # Backup some LV snapshots (you have to create the snapshots before this
+    # and remove them afterwards). We also backup the output of lvdisplay so
+    # we can see the LV sizes at restore time. See also "borg extract" examples.
+    $ lvdisplay > lvdisplay.txt
+    $ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot

 .. include:: usage/extract.rst.inc

@ -229,6 +236,11 @@ Examples
    # Extract the "src" directory but exclude object files
    $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'

+    # Restore LV snapshots (the target LVs /dev/vg0/* of correct size have
+    # to be already available and will be overwritten by this command!)
+    $ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root
+    $ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home
+
 Note: currently, extract always writes into the current working directory ("."),
      so make sure you ``cd`` to the right place before calling ``borg extract``.