1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-24 23:13:25 +00:00

import/export-tar: --tar-format, support ctime/atime

--tar-format=GNU|PAX (default: GNU)

changed the tests which use GNU tar cli tool to use --tar-format=GNU
explicitly, so they don't break in case we change the default.

atime timestamp is only present in output if the archive item has it
(which is not the case by default, needs "borg create --atime ...").
This commit is contained in:
Thomas Waldmann 2022-04-02 14:31:41 +02:00
parent e8456ff7d9
commit 78e92fa9e1
3 changed files with 53 additions and 19 deletions

View file

@ -1445,9 +1445,20 @@ def __init__(self, *, cache, key,
@contextmanager
def create_helper(self, tarinfo, status=None, type=None):
def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))
item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=safe_ns(int(tarinfo.mtime * 1000**3)))
mtime=s_to_ns(tarinfo.mtime))
if tarinfo.pax_headers:
ph = tarinfo.pax_headers
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if name in ph:
ns = s_to_ns(ph[name])
setattr(item, name, ns)
yield item, status
# if we get here, "with"-block worked ok without error/exception, the item was processed ok...
self.add_item(item, stats=self.stats)

View file

@ -1137,7 +1137,8 @@ def peek_and_store_hardlink_masters(item, matched):
# The | (pipe) symbol instructs tarfile to use a streaming mode of operation
# where it never seeks on the passed fileobj.
tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT)
tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)
if progress:
pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract')
@ -1168,13 +1169,6 @@ def item_to_tarinfo(item, original_path):
the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item*
cannot be represented as a TarInfo object and should be skipped.
"""
# If we would use the PAX (POSIX) format (which we currently don't),
# we can support most things that aren't possible with classic tar
# formats, including GNU tar, such as:
# atime, ctime, possibly Linux capabilities (security.* xattrs)
# and various additions supported by GNU tar in POSIX mode.
stream = None
tarinfo = tarfile.TarInfo()
tarinfo.name = item.path
@ -1236,6 +1230,24 @@ def item_to_tarinfo(item, original_path):
return None, stream
return tarinfo, stream
def item_to_paxheaders(item):
"""
Transform (parts of) a Borg *item* into a pax_headers dict.
"""
# When using the PAX (POSIX) format, we can support some things that aren't possible
# with classic tar formats, including GNU tar, such as:
# - atime, ctime (DONE)
# - possibly Linux capabilities, security.* xattrs (TODO)
# - various additions supported by GNU tar in POSIX mode (TODO)
ph = {}
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if hasattr(item, name):
ns = getattr(item, name)
ph[name] = str(ns / 1e9)
return ph
for item in archive.iter_items(filter, partial_extract=partial_extract,
preload=True, hardlink_masters=hardlink_masters):
orig_path = item.path
@ -1243,6 +1255,8 @@ def item_to_tarinfo(item, original_path):
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
tarinfo, stream = item_to_tarinfo(item, orig_path)
if tarinfo:
if args.tar_format == 'PAX':
tarinfo.pax_headers = item_to_paxheaders(item)
if output_list:
logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
tar.addfile(tarinfo, stream)
@ -4043,7 +4057,10 @@ def define_borg_mount(parser):
read the uncompressed tar stream from stdin and write a compressed/filtered
tar stream to stdout.
The generated tarball uses the GNU tar format.
Depending on the ```-tar-format``option, the generated tarball uses this format:
- PAX: POSIX.1-2001 (pax) format
- GNU: GNU tar format
export-tar is a lossy conversion:
BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
@ -4071,6 +4088,9 @@ def define_borg_mount(parser):
help='filter program to pipe data through')
subparser.add_argument('--list', dest='output_list', action='store_true',
help='output verbose list of items (files, dirs, ...)')
subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
choices=('PAX', 'GNU'),
help='select tar format: PAX or GNU')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to export')

View file

@ -3409,7 +3409,7 @@ def test_export_tar(self):
os.unlink('input/flagfile')
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress')
self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress', '--tar-format=GNU')
with changedir('output'):
# This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask.
subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp'])
@ -3424,7 +3424,8 @@ def test_export_tar_gz(self):
os.unlink('input/flagfile')
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', '--list')
list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz',
'--list', '--tar-format=GNU')
assert 'input/file1\n' in list
assert 'input/dir2\n' in list
with changedir('output'):
@ -3439,7 +3440,8 @@ def test_export_tar_strip_components(self):
os.unlink('input/flagfile')
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--strip-components=1', '--list')
list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar',
'--strip-components=1', '--list', '--tar-format=GNU')
# --list's path are those before processing with --strip-components
assert 'input/file1\n' in list
assert 'input/dir2\n' in list
@ -3451,7 +3453,8 @@ def test_export_tar_strip_components(self):
@requires_gnutar
def test_export_tar_strip_components_links(self):
self._extract_hardlinks_setup()
self.cmd('export-tar', self.repository_location + '::test', 'output.tar', '--strip-components=2')
self.cmd('export-tar', self.repository_location + '::test', 'output.tar',
'--strip-components=2', '--tar-format=GNU')
with changedir('output'):
subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
assert os.stat('hardlink').st_nlink == 2
@ -3463,7 +3466,7 @@ def test_export_tar_strip_components_links(self):
@requires_gnutar
def test_extract_hardlinks_tar(self):
self._extract_hardlinks_setup()
self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1')
self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1', '--tar-format=GNU')
with changedir('output'):
subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
assert os.stat('input/dir1/hardlink').st_nlink == 2
@ -3471,26 +3474,26 @@ def test_extract_hardlinks_tar(self):
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
def test_import_tar(self):
def test_import_tar(self, tar_format='PAX'):
self.create_test_files()
os.unlink('input/flagfile')
self.cmd('init', '--encryption=none', self.repository_location)
self.cmd('create', self.repository_location + '::src', 'input')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tar')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', f'--tar-format={tar_format}')
self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
with changedir(self.output_path):
self.cmd('extract', self.repository_location + '::dst')
self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
@requires_gzip
def test_import_tar_gz(self):
def test_import_tar_gz(self, tar_format='GNU'):
if not shutil.which('gzip'):
pytest.skip('gzip is not installed')
self.create_test_files()
os.unlink('input/flagfile')
self.cmd('init', '--encryption=none', self.repository_location)
self.cmd('create', self.repository_location + '::src', 'input')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz', f'--tar-format={tar_format}')
self.cmd('import-tar', self.repository_location + '::dst', 'simple.tgz')
with changedir(self.output_path):
self.cmd('extract', self.repository_location + '::dst')