Merge pull request #6523 from ThomasWaldmann/pax-borg-item-master

import/export-tar: --tar-format=BORG: roundtrip ALL item metadata
This commit is contained in:
TW 2022-04-09 20:22:36 +02:00 committed by GitHub
commit 28fa9e0f0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 114 additions and 40 deletions

View File

@ -1,7 +1,7 @@
.. include:: import-tar.rst.inc
.. include:: export-tar.rst.inc
.. include:: import-tar.rst.inc
Examples
~~~~~~~~
::
@ -9,15 +9,49 @@ Examples
# export as uncompressed tar
$ borg export-tar /path/to/repo::Monday Monday.tar
# exclude some types, compress using gzip
# import an uncompressed tar
$ borg import-tar /path/to/repo::Monday Monday.tar
# exclude some file types, compress using gzip
$ borg export-tar /path/to/repo::Monday Monday.tar.gz --exclude '*.so'
# use higher compression level with gzip
$ borg export-tar --tar-filter="gzip -9" testrepo::linux Monday.tar.gz
$ borg export-tar --tar-filter="gzip -9" repo::Monday Monday.tar.gz
# export a tar, but instead of storing it on disk,
# upload it to a remote site using curl.
# copy an archive from repoA to repoB
$ borg export-tar --tar-format=BORG repoA::archive - | borg import-tar repoB::archive -
# export a tar, but instead of storing it on disk, upload it to remote site using curl
$ borg export-tar /path/to/repo::Monday - | curl --data-binary @- https://somewhere/to/POST
# remote extraction via "tarpipe"
$ borg export-tar /path/to/repo::Monday - | ssh somewhere "cd extracted; tar x"
Archives transfer script
~~~~~~~~~~~~~~~~~~~~~~~~
Outputs a script that copies all archives from repo1 to repo2:
::
for A T in `borg list --format='{archive} {time:%Y-%m-%dT%H:%M:%S}{LF}' repo1`
do
echo "borg export-tar --tar-format=BORG repo1::$A - | borg import-tar --timestamp=$T repo2::$A -"
done
Kept:
- archive name, archive timestamp
- archive contents (all items with metadata and data)
Lost:
- some archive metadata (like the original commandline, execution time, etc.)
Please note:
- all data goes over that pipe, again and again for every archive
- the pipe is dumb, there is no data or transfer time reduction there due to deduplication
- maybe add compression
- pipe over ssh for remote transfer
- no special sparse file support

View File

@ -1,3 +1,4 @@
import base64
import json
import os
import socket
@ -1454,20 +1455,26 @@ class TarfileObjectProcessors:
@contextmanager
def create_helper(self, tarinfo, status=None, type=None):
def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))
ph = tarinfo.pax_headers
if ph and 'BORG.item.version' in ph:
assert ph['BORG.item.version'] == '1'
meta_bin = base64.b64decode(ph['BORG.item.meta'])
meta_dict = msgpack.unpackb(meta_bin, object_hook=StableDict)
item = Item(internal_dict=meta_dict)
else:
def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))
item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=s_to_ns(tarinfo.mtime))
if tarinfo.pax_headers:
ph = tarinfo.pax_headers
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if name in ph:
ns = s_to_ns(ph[name])
setattr(item, name, ns)
item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=s_to_ns(tarinfo.mtime))
if ph:
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if name in ph:
ns = s_to_ns(ph[name])
setattr(item, name, ns)
yield item, status
# if we get here, "with"-block worked ok without error/exception, the item was processed ok...
self.add_item(item, stats=self.stats)

View File

@ -5,6 +5,7 @@ import traceback
try:
import argparse
import base64
import collections
import configparser
import faulthandler
@ -1147,7 +1148,7 @@ class Archiver:
# The | (pipe) symbol instructs tarfile to use a streaming mode of operation
# where it never seeks on the passed fileobj.
tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT, BORG=tarfile.PAX_FORMAT)[args.tar_format]
tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)
if progress:
@ -1240,15 +1241,24 @@ class Archiver:
return None, stream
return tarinfo, stream
def item_to_paxheaders(item):
def item_to_paxheaders(format, item):
"""
Transform (parts of) a Borg *item* into a pax_headers dict.
"""
# PAX format
# ----------
# When using the PAX (POSIX) format, we can support some things that aren't possible
# with classic tar formats, including GNU tar, such as:
# - atime, ctime (DONE)
# - possibly Linux capabilities, security.* xattrs (TODO)
# - various additions supported by GNU tar in POSIX mode (TODO)
#
# BORG format
# -----------
# This is based on PAX, but additionally adds BORG.* pax headers.
# Additionally to the standard tar / PAX metadata and data, it transfers
# ALL borg item metadata in a BORG specific way.
#
ph = {}
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
@ -1256,6 +1266,12 @@ class Archiver:
if hasattr(item, name):
ns = getattr(item, name)
ph[name] = str(ns / 1e9)
if format == 'BORG': # BORG format additions
ph['BORG.item.version'] = '1'
# BORG.item.meta - just serialize all metadata we have:
meta_bin = msgpack.packb(item.as_dict())
meta_text = base64.b64encode(meta_bin).decode()
ph['BORG.item.meta'] = meta_text
return ph
for item in archive.iter_items(filter, partial_extract=partial_extract,
@ -1265,8 +1281,8 @@ class Archiver:
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
tarinfo, stream = item_to_tarinfo(item, orig_path)
if tarinfo:
if args.tar_format == 'PAX':
tarinfo.pax_headers = item_to_paxheaders(item)
if args.tar_format in ('BORG', 'PAX'):
tarinfo.pax_headers = item_to_paxheaders(args.tar_format, item)
if output_list:
logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
tar.addfile(tarinfo, stream)
@ -4067,15 +4083,18 @@ class Archiver:
read the uncompressed tar stream from stdin and write a compressed/filtered
tar stream to stdout.
Depending on the ```-tar-format``option, the generated tarball uses this format:
Depending on the ``-tar-format`` option, these formats are created:
- PAX: POSIX.1-2001 (pax) format
- GNU: GNU tar format
export-tar is a lossy conversion:
BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
Timestamp resolution is limited to whole seconds, not the nanosecond resolution
otherwise supported by Borg.
+--------------+---------------------------+----------------------------+
| --tar-format | Specification | Metadata |
+--------------+---------------------------+----------------------------+
| BORG | BORG specific, like PAX | all as supported by borg |
+--------------+---------------------------+----------------------------+
| PAX | POSIX.1-2001 (pax) format | GNU + atime/ctime/mtime ns |
+--------------+---------------------------+----------------------------+
| GNU | GNU tar format | mtime s, no atime/ctime, |
| | | no ACLs/xattrs/bsdflags |
+--------------+---------------------------+----------------------------+
A ``--sparse`` option (as found in borg extract) is not supported.
@ -4099,8 +4118,8 @@ class Archiver:
subparser.add_argument('--list', dest='output_list', action='store_true',
help='output verbose list of items (files, dirs, ...)')
subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
choices=('PAX', 'GNU'),
help='select tar format: PAX or GNU')
choices=('BORG', 'PAX', 'GNU'),
help='select tar format: BORG, PAX or GNU')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to export')
@ -4974,15 +4993,19 @@ class Archiver:
Most documentation of borg create applies. Note that this command does not
support excluding files.
import-tar is a lossy conversion:
BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
Timestamp resolution is limited to whole seconds, not the nanosecond resolution
otherwise supported by Borg.
A ``--sparse`` option (as found in borg create) is not supported.
import-tar reads POSIX.1-1988 (ustar), POSIX.1-2001 (pax), GNU tar, UNIX V7 tar
and SunOS tar with extended attributes.
About tar formats and metadata conservation or loss, please see ``borg export-tar``.
import-tar reads these tar formats:
- BORG: borg specific (PAX-based)
- PAX: POSIX.1-2001
- GNU: GNU tar
- POSIX.1-1988 (ustar)
- UNIX V7 tar
- SunOS tar with extended attributes
""")
subparser = subparsers.add_parser('import-tar', parents=[common_parser], add_help=False,
description=self.do_import_tar.__doc__,

View File

@ -3499,6 +3499,16 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
self.cmd('extract', self.repository_location + '::dst')
self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
def test_roundtrip_pax_borg(self):
self.create_test_files()
self.cmd('init', '--encryption=none', self.repository_location)
self.cmd('create', self.repository_location + '::src', 'input')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', '--tar-format=BORG')
self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
with changedir(self.output_path):
self.cmd('extract', self.repository_location + '::dst')
self.assert_dirs_equal('input', 'output/input')
# derived from test_extract_xattrs_errors()
@pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='xattr not supported on this system or on this version of'
'fakeroot')