From 6d457aed578f4acccf70d0736ce3295bd616825b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 15 Oct 2015 18:02:24 -0400 Subject: [PATCH] do not upgrade repositories in place by default instead, we perform the equivalent of `cp -al` on the repository to keep a backup, and then rewrite the files, breaking the hardlinks as necessary. it has to be confirmed that the rest of Borg will also break hardlinks when operating on files in the repository. if Borg operates in place on any files of the repository, it could jeoperdize the backup, so this needs to be verified. I believe that most files are written to a temporary file and moved into place, however, so the backup should be safe. the rationale behind the backup copy is that we want to be extra careful with user's data by default. the old behavior is retained through the `--inplace`/`-i` commandline flag. plus, this way we don't need to tell users to go through extra steps (`cp -a`, in particular) before running the command. also, it can take a long time to do the copy of the attic repository we wish to work on. since `cp -a` doesn't provide progress information, the new default behavior provides a nicer user experience of giving an overall impression of the upgrade progress, while retaining compatibility with Attic by default (in a separate repository, of course). this makes the upgrade command much less scary to use and hopefully will convert drones to the borg collective. the only place where the default inplace behavior is retained is in the header_replace() function, to avoid breaking the cache conversion code and to keep API stability and semantic coherence ("replace" by defaults means in place). --- borg/archiver.py | 31 +++++++++++++++------------ borg/testsuite/upgrader.py | 43 ++++++++++++++++++++++++++++++++++---- borg/upgrader.py | 34 +++++++++++++++++++++++------- 3 files changed, 83 insertions(+), 25 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 3c2b6fe86..b85aafec6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -472,7 +472,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # XXX: should auto-detect if it is an attic repository here repo = AtticRepositoryUpgrader(args.repository.path, create=False) try: - repo.upgrade(args.dry_run) + repo.upgrade(args.dry_run, inplace=args.inplace) except NotImplementedError as e: print("warning: %s" % e) return self.exit_code @@ -894,7 +894,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='repository to prune') upgrade_epilog = textwrap.dedent(""" - upgrade an existing Borg repository in place. this currently + upgrade an existing Borg repository. this currently only support converting an Attic repository, but may eventually be extended to cover major Borg upgrades as well. @@ -909,13 +909,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") the first backup after the conversion takes longer than expected due to the cache resync. - it is recommended you run this on a copy of the Attic - repository, in case something goes wrong, for example: - - cp -a attic borg - borg upgrade -n borg - borg upgrade borg - upgrade should be able to resume if interrupted, although it will still iterate over all segments. if you want to start from scratch, use `borg delete` over the copied repository to @@ -923,11 +916,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") borg delete borg - the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic - will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as - the magic strings will have changed. + unless ``--inplace`` is specified, the upgrade process first + creates a backup copy of the repository, in + REPOSITORY.upgrade-DATETIME, using hardlinks. this takes + longer than in place upgrades, but is much safer and gives + progress information (as opposed to ``cp -al``). once you are + satisfied with the conversion, you can safely destroy the + backup copy. - you have been warned.""") + WARNING: running the upgrade in place will make the current + copy unuseable with older version, with no way of going back + to previous versions. this can PERMANENTLY DAMAGE YOUR + REPOSITORY! Attic CAN NOT READ BORG REPOSITORIES, as the + magic strings have changed. you have been warned.""") subparser = subparsers.add_parser('upgrade', parents=[common_parser], description=self.do_upgrade.__doc__, epilog=upgrade_epilog, @@ -936,6 +937,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-n', '--dry-run', dest='dry_run', default=False, action='store_true', help='do not change repository') + subparser.add_argument('-i', '--inplace', dest='inplace', + default=False, action='store_true', + help="""rewrite repository in-place, with no chance of going back to older + versions of the repository.""") subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='path to the repository to be upgraded') diff --git a/borg/testsuite/upgrader.py b/borg/testsuite/upgrader.py index 757273189..f931af23e 100644 --- a/borg/testsuite/upgrader.py +++ b/borg/testsuite/upgrader.py @@ -63,8 +63,11 @@ def attic_repo(tmpdir): attic_repo.close() return attic_repo +@pytest.fixture(params=[True, False]) +def inplace(request): + return request.param -def test_convert_segments(tmpdir, attic_repo): +def test_convert_segments(tmpdir, attic_repo, inplace): """test segment conversion this will load the given attic repository, list all the segments @@ -80,7 +83,7 @@ def test_convert_segments(tmpdir, attic_repo): repo = AtticRepositoryUpgrader(str(tmpdir), create=False) segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() - repo.convert_segments(segments, dryrun=False) + repo.convert_segments(segments, dryrun=False, inplace=inplace) repo.convert_cache(dryrun=False) assert repo_valid(tmpdir) @@ -141,7 +144,7 @@ def test_keys(tmpdir, attic_repo, attic_key_file): assert key_valid(attic_key_file.path) -def test_convert_all(tmpdir, attic_repo, attic_key_file): +def test_convert_all(tmpdir, attic_repo, attic_key_file, inplace): """test all conversion steps this runs everything. mostly redundant test, since everything is @@ -155,7 +158,39 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file): """ # check should fail because of magic number assert not repo_valid(tmpdir) + def first_inode(path): + return os.stat(os.path.join(path, 'data', '0', '0')).st_ino + orig_inode = first_inode(attic_repo.path) repo = AtticRepositoryUpgrader(str(tmpdir), create=False) - repo.upgrade(dryrun=False) + backup = repo.upgrade(dryrun=False, inplace=inplace) + if inplace: + assert backup is None + assert first_inode(repo.path) == orig_inode + else: + assert backup + assert first_inode(repo.path) != first_inode(backup) + assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) + +def test_hardlink(tmpdir, inplace): + """test that we handle hard links properly + + that is, if we are in "inplace" mode, hardlinks should *not* + change (ie. we write the file directly, so not the whole file, and + not re-create the file). + + if we are *not* in inplace mode, then the inode should change, as + we are supposed to leave the original inode alone.""" + a = str(tmpdir.join('a')) + with open(a, 'wb') as tmp: + tmp.write(b'aXXX') + b = str(tmpdir.join('b')) + os.link(a, b) + AtticRepositoryUpgrader.header_replace(b, b'a', b'b', inplace=inplace) + if not inplace: + assert os.stat(a).st_ino != os.stat(b).st_ino + else: + assert os.stat(a).st_ino == os.stat(b).st_ino + with open(b, 'rb') as tmp: + assert tmp.read() == b'bXXX' diff --git a/borg/upgrader.py b/borg/upgrader.py index 33f100e38..cc50c24de 100644 --- a/borg/upgrader.py +++ b/borg/upgrader.py @@ -1,4 +1,5 @@ from binascii import hexlify +import datetime import logging logger = logging.getLogger(__name__) import os @@ -15,7 +16,7 @@ ATTIC_MAGIC = b'ATTICSEG' class AtticRepositoryUpgrader(Repository): - def upgrade(self, dryrun=True): + def upgrade(self, dryrun=True, inplace=False): """convert an attic repository to a borg repository those are the files that need to be upgraded here, from most @@ -26,6 +27,12 @@ class AtticRepositoryUpgrader(Repository): we nevertheless do the order in reverse, as we prefer to do the fast stuff first, to improve interactivity. """ + backup = None + if not inplace: + backup = '{}.upgrade-{:%Y-%m-%d-%H:%M:%S}'.format(self.path, datetime.datetime.now()) + logger.info('making a hardlink copy in %s', backup) + if not dryrun: + shutil.copytree(self.path, backup, copy_function=lambda src, dst: os.link(src, dst)) logger.info("opening attic repository with borg and converting") # we need to open the repo to load configuration, keyfiles and segments self.open(self.path, exclusive=False) @@ -42,13 +49,14 @@ class AtticRepositoryUpgrader(Repository): exclusive=True).acquire() try: self.convert_cache(dryrun) - self.convert_segments(segments, dryrun) + self.convert_segments(segments, dryrun=dryrun, inplace=inplace) finally: self.lock.release() self.lock = None + return backup @staticmethod - def convert_segments(segments, dryrun): + def convert_segments(segments, dryrun=True, inplace=False): """convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in @@ -60,23 +68,33 @@ class AtticRepositoryUpgrader(Repository): i = 0 for filename in segments: i += 1 - print("\rconverting segment %d/%d in place, %.2f%% done (%s)" + print("\rconverting segment %d/%d, %.2f%% done (%s)" % (i, len(segments), 100*float(i)/len(segments), filename), end='', file=sys.stderr) if dryrun: time.sleep(0.001) else: - AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC) + AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC, inplace=inplace) print(file=sys.stderr) @staticmethod - def header_replace(filename, old_magic, new_magic): + def header_replace(filename, old_magic, new_magic, inplace=True): with open(filename, 'r+b') as segment: segment.seek(0) # only write if necessary if segment.read(len(old_magic)) == old_magic: - segment.seek(0) - segment.write(new_magic) + if inplace: + segment.seek(0) + segment.write(new_magic) + else: + # remove the hardlink and rewrite the file. this + # works because our old file handle is still open + # so even though the file is removed, we can still + # read it until the file is closed. + os.unlink(filename) + with open(filename, 'wb') as new_segment: + new_segment.write(new_magic) + new_segment.write(segment.read()) def find_attic_keyfile(self): """find the attic keyfiles