do not upgrade repositories in place by default

instead, we perform the equivalent of `cp -al` on the repository to
keep a backup, and then rewrite the files, breaking the hardlinks as
necessary.

it has to be confirmed that the rest of Borg will also break hardlinks
when operating on files in the repository. if Borg operates in place
on any files of the repository, it could jeoperdize the backup, so
this needs to be verified. I believe that most files are written to a
temporary file and moved into place, however, so the backup should be
safe.

the rationale behind the backup copy is that we want to be extra
careful with user's data by default. the old behavior is retained
through the `--inplace`/`-i` commandline flag. plus, this way we don't
need to tell users to go through extra steps (`cp -a`, in particular)
before running the command.

also, it can take a long time to do the copy of the attic repository
we wish to work on. since `cp -a` doesn't provide progress
information, the new default behavior provides a nicer user experience
of giving an overall impression of the upgrade progress, while
retaining compatibility with Attic by default (in a separate
repository, of course).

this makes the upgrade command much less scary to use and hopefully
will convert drones to the borg collective.

the only place where the default inplace behavior is retained is in
the header_replace() function, to avoid breaking the cache conversion
code and to keep API stability and semantic coherence ("replace" by
defaults means in place).
This commit is contained in:
Antoine Beaupré 2015-10-15 18:02:24 -04:00
parent dbc183f87f
commit 6d457aed57
3 changed files with 83 additions and 25 deletions

View File

@ -472,7 +472,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
# XXX: should auto-detect if it is an attic repository here
repo = AtticRepositoryUpgrader(args.repository.path, create=False)
try:
repo.upgrade(args.dry_run)
repo.upgrade(args.dry_run, inplace=args.inplace)
except NotImplementedError as e:
print("warning: %s" % e)
return self.exit_code
@ -894,7 +894,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
help='repository to prune')
upgrade_epilog = textwrap.dedent("""
upgrade an existing Borg repository in place. this currently
upgrade an existing Borg repository. this currently
only support converting an Attic repository, but may
eventually be extended to cover major Borg upgrades as well.
@ -909,13 +909,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
the first backup after the conversion takes longer than expected
due to the cache resync.
it is recommended you run this on a copy of the Attic
repository, in case something goes wrong, for example:
cp -a attic borg
borg upgrade -n borg
borg upgrade borg
upgrade should be able to resume if interrupted, although it
will still iterate over all segments. if you want to start
from scratch, use `borg delete` over the copied repository to
@ -923,11 +916,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
borg delete borg
the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic
will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as
the magic strings will have changed.
unless ``--inplace`` is specified, the upgrade process first
creates a backup copy of the repository, in
REPOSITORY.upgrade-DATETIME, using hardlinks. this takes
longer than in place upgrades, but is much safer and gives
progress information (as opposed to ``cp -al``). once you are
satisfied with the conversion, you can safely destroy the
backup copy.
you have been warned.""")
WARNING: running the upgrade in place will make the current
copy unuseable with older version, with no way of going back
to previous versions. this can PERMANENTLY DAMAGE YOUR
REPOSITORY! Attic CAN NOT READ BORG REPOSITORIES, as the
magic strings have changed. you have been warned.""")
subparser = subparsers.add_parser('upgrade', parents=[common_parser],
description=self.do_upgrade.__doc__,
epilog=upgrade_epilog,
@ -936,6 +937,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
subparser.add_argument('-n', '--dry-run', dest='dry_run',
default=False, action='store_true',
help='do not change repository')
subparser.add_argument('-i', '--inplace', dest='inplace',
default=False, action='store_true',
help="""rewrite repository in-place, with no chance of going back to older
versions of the repository.""")
subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
type=location_validator(archive=False),
help='path to the repository to be upgraded')

View File

@ -63,8 +63,11 @@ def attic_repo(tmpdir):
attic_repo.close()
return attic_repo
@pytest.fixture(params=[True, False])
def inplace(request):
return request.param
def test_convert_segments(tmpdir, attic_repo):
def test_convert_segments(tmpdir, attic_repo, inplace):
"""test segment conversion
this will load the given attic repository, list all the segments
@ -80,7 +83,7 @@ def test_convert_segments(tmpdir, attic_repo):
repo = AtticRepositoryUpgrader(str(tmpdir), create=False)
segments = [filename for i, filename in repo.io.segment_iterator()]
repo.close()
repo.convert_segments(segments, dryrun=False)
repo.convert_segments(segments, dryrun=False, inplace=inplace)
repo.convert_cache(dryrun=False)
assert repo_valid(tmpdir)
@ -141,7 +144,7 @@ def test_keys(tmpdir, attic_repo, attic_key_file):
assert key_valid(attic_key_file.path)
def test_convert_all(tmpdir, attic_repo, attic_key_file):
def test_convert_all(tmpdir, attic_repo, attic_key_file, inplace):
"""test all conversion steps
this runs everything. mostly redundant test, since everything is
@ -155,7 +158,39 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file):
"""
# check should fail because of magic number
assert not repo_valid(tmpdir)
def first_inode(path):
return os.stat(os.path.join(path, 'data', '0', '0')).st_ino
orig_inode = first_inode(attic_repo.path)
repo = AtticRepositoryUpgrader(str(tmpdir), create=False)
repo.upgrade(dryrun=False)
backup = repo.upgrade(dryrun=False, inplace=inplace)
if inplace:
assert backup is None
assert first_inode(repo.path) == orig_inode
else:
assert backup
assert first_inode(repo.path) != first_inode(backup)
assert key_valid(attic_key_file.path)
assert repo_valid(tmpdir)
def test_hardlink(tmpdir, inplace):
"""test that we handle hard links properly
that is, if we are in "inplace" mode, hardlinks should *not*
change (ie. we write the file directly, so not the whole file, and
not re-create the file).
if we are *not* in inplace mode, then the inode should change, as
we are supposed to leave the original inode alone."""
a = str(tmpdir.join('a'))
with open(a, 'wb') as tmp:
tmp.write(b'aXXX')
b = str(tmpdir.join('b'))
os.link(a, b)
AtticRepositoryUpgrader.header_replace(b, b'a', b'b', inplace=inplace)
if not inplace:
assert os.stat(a).st_ino != os.stat(b).st_ino
else:
assert os.stat(a).st_ino == os.stat(b).st_ino
with open(b, 'rb') as tmp:
assert tmp.read() == b'bXXX'

View File

@ -1,4 +1,5 @@
from binascii import hexlify
import datetime
import logging
logger = logging.getLogger(__name__)
import os
@ -15,7 +16,7 @@ ATTIC_MAGIC = b'ATTICSEG'
class AtticRepositoryUpgrader(Repository):
def upgrade(self, dryrun=True):
def upgrade(self, dryrun=True, inplace=False):
"""convert an attic repository to a borg repository
those are the files that need to be upgraded here, from most
@ -26,6 +27,12 @@ class AtticRepositoryUpgrader(Repository):
we nevertheless do the order in reverse, as we prefer to do
the fast stuff first, to improve interactivity.
"""
backup = None
if not inplace:
backup = '{}.upgrade-{:%Y-%m-%d-%H:%M:%S}'.format(self.path, datetime.datetime.now())
logger.info('making a hardlink copy in %s', backup)
if not dryrun:
shutil.copytree(self.path, backup, copy_function=lambda src, dst: os.link(src, dst))
logger.info("opening attic repository with borg and converting")
# we need to open the repo to load configuration, keyfiles and segments
self.open(self.path, exclusive=False)
@ -42,13 +49,14 @@ class AtticRepositoryUpgrader(Repository):
exclusive=True).acquire()
try:
self.convert_cache(dryrun)
self.convert_segments(segments, dryrun)
self.convert_segments(segments, dryrun=dryrun, inplace=inplace)
finally:
self.lock.release()
self.lock = None
return backup
@staticmethod
def convert_segments(segments, dryrun):
def convert_segments(segments, dryrun=True, inplace=False):
"""convert repository segments from attic to borg
replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in
@ -60,23 +68,33 @@ class AtticRepositoryUpgrader(Repository):
i = 0
for filename in segments:
i += 1
print("\rconverting segment %d/%d in place, %.2f%% done (%s)"
print("\rconverting segment %d/%d, %.2f%% done (%s)"
% (i, len(segments), 100*float(i)/len(segments), filename),
end='', file=sys.stderr)
if dryrun:
time.sleep(0.001)
else:
AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC)
AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC, inplace=inplace)
print(file=sys.stderr)
@staticmethod
def header_replace(filename, old_magic, new_magic):
def header_replace(filename, old_magic, new_magic, inplace=True):
with open(filename, 'r+b') as segment:
segment.seek(0)
# only write if necessary
if segment.read(len(old_magic)) == old_magic:
segment.seek(0)
segment.write(new_magic)
if inplace:
segment.seek(0)
segment.write(new_magic)
else:
# remove the hardlink and rewrite the file. this
# works because our old file handle is still open
# so even though the file is removed, we can still
# read it until the file is closed.
os.unlink(filename)
with open(filename, 'wb') as new_segment:
new_segment.write(new_magic)
new_segment.write(segment.read())
def find_attic_keyfile(self):
"""find the attic keyfiles