diff --git a/docs/usage/tar.rst b/docs/usage/tar.rst index 34f307cdd..ed1dcc0a6 100644 --- a/docs/usage/tar.rst +++ b/docs/usage/tar.rst @@ -1,7 +1,7 @@ -.. include:: import-tar.rst.inc - .. include:: export-tar.rst.inc +.. include:: import-tar.rst.inc + Examples ~~~~~~~~ :: @@ -9,15 +9,49 @@ Examples # export as uncompressed tar $ borg export-tar /path/to/repo::Monday Monday.tar - # exclude some types, compress using gzip + # import an uncompressed tar + $ borg import-tar /path/to/repo::Monday Monday.tar + + # exclude some file types, compress using gzip $ borg export-tar /path/to/repo::Monday Monday.tar.gz --exclude '*.so' # use higher compression level with gzip - $ borg export-tar --tar-filter="gzip -9" testrepo::linux Monday.tar.gz + $ borg export-tar --tar-filter="gzip -9" repo::Monday Monday.tar.gz - # export a tar, but instead of storing it on disk, - # upload it to a remote site using curl. + # copy an archive from repoA to repoB + $ borg export-tar --tar-format=BORG repoA::archive - | borg import-tar repoB::archive - + + # export a tar, but instead of storing it on disk, upload it to remote site using curl $ borg export-tar /path/to/repo::Monday - | curl --data-binary @- https://somewhere/to/POST # remote extraction via "tarpipe" $ borg export-tar /path/to/repo::Monday - | ssh somewhere "cd extracted; tar x" + +Archives transfer script +~~~~~~~~~~~~~~~~~~~~~~~~ + +Outputs a script that copies all archives from repo1 to repo2: + +:: + + for A T in `borg list --format='{archive} {time:%Y-%m-%dT%H:%M:%S}{LF}' repo1` + do + echo "borg export-tar --tar-format=BORG repo1::$A - | borg import-tar --timestamp=$T repo2::$A -" + done + +Kept: + +- archive name, archive timestamp +- archive contents (all items with metadata and data) + +Lost: + +- some archive metadata (like the original commandline, execution time, etc.) + +Please note: + +- all data goes over that pipe, again and again for every archive +- the pipe is dumb, there is no data or transfer time reduction there due to deduplication +- maybe add compression +- pipe over ssh for remote transfer +- no special sparse file support diff --git a/src/borg/archive.py b/src/borg/archive.py index d6fa3b105..543ac26d2 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1,3 +1,4 @@ +import base64 import json import os import socket @@ -1445,20 +1446,26 @@ class TarfileObjectProcessors: @contextmanager def create_helper(self, tarinfo, status=None, type=None): - def s_to_ns(s): - return safe_ns(int(float(s) * 1e9)) + ph = tarinfo.pax_headers + if ph and 'BORG.item.version' in ph: + assert ph['BORG.item.version'] == '1' + meta_bin = base64.b64decode(ph['BORG.item.meta']) + meta_dict = msgpack.unpackb(meta_bin, object_hook=StableDict) + item = Item(internal_dict=meta_dict) + else: + def s_to_ns(s): + return safe_ns(int(float(s) * 1e9)) - item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type, - uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None, - mtime=s_to_ns(tarinfo.mtime)) - if tarinfo.pax_headers: - ph = tarinfo.pax_headers - # note: for mtime this is a bit redundant as it is already done by tarfile module, - # but we just do it in our way to be consistent for sure. - for name in 'atime', 'ctime', 'mtime': - if name in ph: - ns = s_to_ns(ph[name]) - setattr(item, name, ns) + item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type, + uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None, + mtime=s_to_ns(tarinfo.mtime)) + if ph: + # note: for mtime this is a bit redundant as it is already done by tarfile module, + # but we just do it in our way to be consistent for sure. + for name in 'atime', 'ctime', 'mtime': + if name in ph: + ns = s_to_ns(ph[name]) + setattr(item, name, ns) yield item, status # if we get here, "with"-block worked ok without error/exception, the item was processed ok... self.add_item(item, stats=self.stats) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 7eb25bac3..5f81d8d94 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -5,6 +5,7 @@ import traceback try: import argparse + import base64 import collections import configparser import faulthandler @@ -1137,7 +1138,7 @@ class Archiver: # The | (pipe) symbol instructs tarfile to use a streaming mode of operation # where it never seeks on the passed fileobj. - tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format] + tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT, BORG=tarfile.PAX_FORMAT)[args.tar_format] tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format) if progress: @@ -1230,15 +1231,24 @@ class Archiver: return None, stream return tarinfo, stream - def item_to_paxheaders(item): + def item_to_paxheaders(format, item): """ Transform (parts of) a Borg *item* into a pax_headers dict. """ + # PAX format + # ---------- # When using the PAX (POSIX) format, we can support some things that aren't possible # with classic tar formats, including GNU tar, such as: # - atime, ctime (DONE) # - possibly Linux capabilities, security.* xattrs (TODO) # - various additions supported by GNU tar in POSIX mode (TODO) + # + # BORG format + # ----------- + # This is based on PAX, but additionally adds BORG.* pax headers. + # Additionally to the standard tar / PAX metadata and data, it transfers + # ALL borg item metadata in a BORG specific way. + # ph = {} # note: for mtime this is a bit redundant as it is already done by tarfile module, # but we just do it in our way to be consistent for sure. @@ -1246,6 +1256,12 @@ class Archiver: if hasattr(item, name): ns = getattr(item, name) ph[name] = str(ns / 1e9) + if format == 'BORG': # BORG format additions + ph['BORG.item.version'] = '1' + # BORG.item.meta - just serialize all metadata we have: + meta_bin = msgpack.packb(item.as_dict()) + meta_text = base64.b64encode(meta_bin).decode() + ph['BORG.item.meta'] = meta_text return ph for item in archive.iter_items(filter, partial_extract=partial_extract, @@ -1255,8 +1271,8 @@ class Archiver: item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) tarinfo, stream = item_to_tarinfo(item, orig_path) if tarinfo: - if args.tar_format == 'PAX': - tarinfo.pax_headers = item_to_paxheaders(item) + if args.tar_format in ('BORG', 'PAX'): + tarinfo.pax_headers = item_to_paxheaders(args.tar_format, item) if output_list: logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) tar.addfile(tarinfo, stream) @@ -4057,15 +4073,18 @@ class Archiver: read the uncompressed tar stream from stdin and write a compressed/filtered tar stream to stdout. - Depending on the ```-tar-format``option, the generated tarball uses this format: + Depending on the ``-tar-format`` option, these formats are created: - - PAX: POSIX.1-2001 (pax) format - - GNU: GNU tar format - - export-tar is a lossy conversion: - BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported. - Timestamp resolution is limited to whole seconds, not the nanosecond resolution - otherwise supported by Borg. + +--------------+---------------------------+----------------------------+ + | --tar-format | Specification | Metadata | + +--------------+---------------------------+----------------------------+ + | BORG | BORG specific, like PAX | all as supported by borg | + +--------------+---------------------------+----------------------------+ + | PAX | POSIX.1-2001 (pax) format | GNU + atime/ctime/mtime ns | + +--------------+---------------------------+----------------------------+ + | GNU | GNU tar format | mtime s, no atime/ctime, | + | | | no ACLs/xattrs/bsdflags | + +--------------+---------------------------+----------------------------+ A ``--sparse`` option (as found in borg extract) is not supported. @@ -4089,8 +4108,8 @@ class Archiver: subparser.add_argument('--list', dest='output_list', action='store_true', help='output verbose list of items (files, dirs, ...)') subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU', - choices=('PAX', 'GNU'), - help='select tar format: PAX or GNU') + choices=('BORG', 'PAX', 'GNU'), + help='select tar format: BORG, PAX or GNU') subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to export') @@ -4939,15 +4958,19 @@ class Archiver: Most documentation of borg create applies. Note that this command does not support excluding files. - import-tar is a lossy conversion: - BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported. - Timestamp resolution is limited to whole seconds, not the nanosecond resolution - otherwise supported by Borg. - A ``--sparse`` option (as found in borg create) is not supported. - import-tar reads POSIX.1-1988 (ustar), POSIX.1-2001 (pax), GNU tar, UNIX V7 tar - and SunOS tar with extended attributes. + About tar formats and metadata conservation or loss, please see ``borg export-tar``. + + import-tar reads these tar formats: + + - BORG: borg specific (PAX-based) + - PAX: POSIX.1-2001 + - GNU: GNU tar + - POSIX.1-1988 (ustar) + - UNIX V7 tar + - SunOS tar with extended attributes + """) subparser = subparsers.add_parser('import-tar', parents=[common_parser], add_help=False, description=self.do_import_tar.__doc__, diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 48637b1f7..b5cde866e 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -3499,6 +3499,16 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 self.cmd('extract', self.repository_location + '::dst') self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True) + def test_roundtrip_pax_borg(self): + self.create_test_files() + self.cmd('init', '--encryption=none', self.repository_location) + self.cmd('create', self.repository_location + '::src', 'input') + self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', '--tar-format=BORG') + self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar') + with changedir(self.output_path): + self.cmd('extract', self.repository_location + '::dst') + self.assert_dirs_equal('input', 'output/input') + # derived from test_extract_xattrs_errors() @pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='xattr not supported on this system or on this version of' 'fakeroot')