1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-25 17:27:31 +00:00

Merge pull request #6522 from ThomasWaldmann/tar-pax-master

import/export-tar: PAX format, ctime and atime support
This commit is contained in:
TW 2022-04-02 19:08:53 +02:00 committed by GitHub
commit 22fc6d1bdd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 19 deletions

View file

@ -1445,9 +1445,20 @@ def __init__(self, *, cache, key,
@contextmanager
def create_helper(self, tarinfo, status=None, type=None):
def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))
item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=safe_ns(int(tarinfo.mtime * 1000**3)))
mtime=s_to_ns(tarinfo.mtime))
if tarinfo.pax_headers:
ph = tarinfo.pax_headers
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if name in ph:
ns = s_to_ns(ph[name])
setattr(item, name, ns)
yield item, status
# if we get here, "with"-block worked ok without error/exception, the item was processed ok...
self.add_item(item, stats=self.stats)

View file

@ -1137,7 +1137,8 @@ def peek_and_store_hardlink_masters(item, matched):
# The | (pipe) symbol instructs tarfile to use a streaming mode of operation
# where it never seeks on the passed fileobj.
tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT)
tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)
if progress:
pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract')
@ -1168,13 +1169,6 @@ def item_to_tarinfo(item, original_path):
the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item*
cannot be represented as a TarInfo object and should be skipped.
"""
# If we would use the PAX (POSIX) format (which we currently don't),
# we can support most things that aren't possible with classic tar
# formats, including GNU tar, such as:
# atime, ctime, possibly Linux capabilities (security.* xattrs)
# and various additions supported by GNU tar in POSIX mode.
stream = None
tarinfo = tarfile.TarInfo()
tarinfo.name = item.path
@ -1236,6 +1230,24 @@ def item_to_tarinfo(item, original_path):
return None, stream
return tarinfo, stream
def item_to_paxheaders(item):
"""
Transform (parts of) a Borg *item* into a pax_headers dict.
"""
# When using the PAX (POSIX) format, we can support some things that aren't possible
# with classic tar formats, including GNU tar, such as:
# - atime, ctime (DONE)
# - possibly Linux capabilities, security.* xattrs (TODO)
# - various additions supported by GNU tar in POSIX mode (TODO)
ph = {}
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if hasattr(item, name):
ns = getattr(item, name)
ph[name] = str(ns / 1e9)
return ph
for item in archive.iter_items(filter, partial_extract=partial_extract,
preload=True, hardlink_masters=hardlink_masters):
orig_path = item.path
@ -1243,6 +1255,8 @@ def item_to_tarinfo(item, original_path):
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
tarinfo, stream = item_to_tarinfo(item, orig_path)
if tarinfo:
if args.tar_format == 'PAX':
tarinfo.pax_headers = item_to_paxheaders(item)
if output_list:
logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
tar.addfile(tarinfo, stream)
@ -4043,7 +4057,10 @@ def define_borg_mount(parser):
read the uncompressed tar stream from stdin and write a compressed/filtered
tar stream to stdout.
The generated tarball uses the GNU tar format.
Depending on the ```-tar-format``option, the generated tarball uses this format:
- PAX: POSIX.1-2001 (pax) format
- GNU: GNU tar format
export-tar is a lossy conversion:
BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
@ -4071,6 +4088,9 @@ def define_borg_mount(parser):
help='filter program to pipe data through')
subparser.add_argument('--list', dest='output_list', action='store_true',
help='output verbose list of items (files, dirs, ...)')
subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
choices=('PAX', 'GNU'),
help='select tar format: PAX or GNU')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to export')

View file

@ -3409,7 +3409,7 @@ def test_export_tar(self):
os.unlink('input/flagfile')
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress')
self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress', '--tar-format=GNU')
with changedir('output'):
# This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask.
subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp'])
@ -3424,7 +3424,8 @@ def test_export_tar_gz(self):
os.unlink('input/flagfile')
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', '--list')
list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz',
'--list', '--tar-format=GNU')
assert 'input/file1\n' in list
assert 'input/dir2\n' in list
with changedir('output'):
@ -3439,7 +3440,8 @@ def test_export_tar_strip_components(self):
os.unlink('input/flagfile')
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--strip-components=1', '--list')
list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar',
'--strip-components=1', '--list', '--tar-format=GNU')
# --list's path are those before processing with --strip-components
assert 'input/file1\n' in list
assert 'input/dir2\n' in list
@ -3451,7 +3453,8 @@ def test_export_tar_strip_components(self):
@requires_gnutar
def test_export_tar_strip_components_links(self):
self._extract_hardlinks_setup()
self.cmd('export-tar', self.repository_location + '::test', 'output.tar', '--strip-components=2')
self.cmd('export-tar', self.repository_location + '::test', 'output.tar',
'--strip-components=2', '--tar-format=GNU')
with changedir('output'):
subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
assert os.stat('hardlink').st_nlink == 2
@ -3463,7 +3466,7 @@ def test_export_tar_strip_components_links(self):
@requires_gnutar
def test_extract_hardlinks_tar(self):
self._extract_hardlinks_setup()
self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1')
self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1', '--tar-format=GNU')
with changedir('output'):
subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
assert os.stat('input/dir1/hardlink').st_nlink == 2
@ -3471,26 +3474,26 @@ def test_extract_hardlinks_tar(self):
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
def test_import_tar(self):
def test_import_tar(self, tar_format='PAX'):
self.create_test_files()
os.unlink('input/flagfile')
self.cmd('init', '--encryption=none', self.repository_location)
self.cmd('create', self.repository_location + '::src', 'input')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tar')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', f'--tar-format={tar_format}')
self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
with changedir(self.output_path):
self.cmd('extract', self.repository_location + '::dst')
self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
@requires_gzip
def test_import_tar_gz(self):
def test_import_tar_gz(self, tar_format='GNU'):
if not shutil.which('gzip'):
pytest.skip('gzip is not installed')
self.create_test_files()
os.unlink('input/flagfile')
self.cmd('init', '--encryption=none', self.repository_location)
self.cmd('create', self.repository_location + '::src', 'input')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz', f'--tar-format={tar_format}')
self.cmd('import-tar', self.repository_location + '::dst', 'simple.tgz')
with changedir(self.output_path):
self.cmd('extract', self.repository_location + '::dst')