From 78e92fa9e137716ced28f7e2a80548dd682da433 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Apr 2022 14:31:41 +0200 Subject: [PATCH] import/export-tar: --tar-format, support ctime/atime --tar-format=GNU|PAX (default: GNU) changed the tests which use GNU tar cli tool to use --tar-format=GNU explicitly, so they don't break in case we change the default. atime timestamp is only present in output if the archive item has it (which is not the case by default, needs "borg create --atime ..."). --- src/borg/archive.py | 13 +++++++++++- src/borg/archiver.py | 38 ++++++++++++++++++++++++++-------- src/borg/testsuite/archiver.py | 21 +++++++++++-------- 3 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 833616661..d6fa3b105 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1445,9 +1445,20 @@ def __init__(self, *, cache, key, @contextmanager def create_helper(self, tarinfo, status=None, type=None): + def s_to_ns(s): + return safe_ns(int(float(s) * 1e9)) + item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type, uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None, - mtime=safe_ns(int(tarinfo.mtime * 1000**3))) + mtime=s_to_ns(tarinfo.mtime)) + if tarinfo.pax_headers: + ph = tarinfo.pax_headers + # note: for mtime this is a bit redundant as it is already done by tarfile module, + # but we just do it in our way to be consistent for sure. + for name in 'atime', 'ctime', 'mtime': + if name in ph: + ns = s_to_ns(ph[name]) + setattr(item, name, ns) yield item, status # if we get here, "with"-block worked ok without error/exception, the item was processed ok... self.add_item(item, stats=self.stats) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index c5341da5e..7eb25bac3 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -1137,7 +1137,8 @@ def peek_and_store_hardlink_masters(item, matched): # The | (pipe) symbol instructs tarfile to use a streaming mode of operation # where it never seeks on the passed fileobj. - tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT) + tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format] + tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format) if progress: pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract') @@ -1168,13 +1169,6 @@ def item_to_tarinfo(item, original_path): the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item* cannot be represented as a TarInfo object and should be skipped. """ - - # If we would use the PAX (POSIX) format (which we currently don't), - # we can support most things that aren't possible with classic tar - # formats, including GNU tar, such as: - # atime, ctime, possibly Linux capabilities (security.* xattrs) - # and various additions supported by GNU tar in POSIX mode. - stream = None tarinfo = tarfile.TarInfo() tarinfo.name = item.path @@ -1236,6 +1230,24 @@ def item_to_tarinfo(item, original_path): return None, stream return tarinfo, stream + def item_to_paxheaders(item): + """ + Transform (parts of) a Borg *item* into a pax_headers dict. + """ + # When using the PAX (POSIX) format, we can support some things that aren't possible + # with classic tar formats, including GNU tar, such as: + # - atime, ctime (DONE) + # - possibly Linux capabilities, security.* xattrs (TODO) + # - various additions supported by GNU tar in POSIX mode (TODO) + ph = {} + # note: for mtime this is a bit redundant as it is already done by tarfile module, + # but we just do it in our way to be consistent for sure. + for name in 'atime', 'ctime', 'mtime': + if hasattr(item, name): + ns = getattr(item, name) + ph[name] = str(ns / 1e9) + return ph + for item in archive.iter_items(filter, partial_extract=partial_extract, preload=True, hardlink_masters=hardlink_masters): orig_path = item.path @@ -1243,6 +1255,8 @@ def item_to_tarinfo(item, original_path): item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) tarinfo, stream = item_to_tarinfo(item, orig_path) if tarinfo: + if args.tar_format == 'PAX': + tarinfo.pax_headers = item_to_paxheaders(item) if output_list: logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) tar.addfile(tarinfo, stream) @@ -4043,7 +4057,10 @@ def define_borg_mount(parser): read the uncompressed tar stream from stdin and write a compressed/filtered tar stream to stdout. - The generated tarball uses the GNU tar format. + Depending on the ```-tar-format``option, the generated tarball uses this format: + + - PAX: POSIX.1-2001 (pax) format + - GNU: GNU tar format export-tar is a lossy conversion: BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported. @@ -4071,6 +4088,9 @@ def define_borg_mount(parser): help='filter program to pipe data through') subparser.add_argument('--list', dest='output_list', action='store_true', help='output verbose list of items (files, dirs, ...)') + subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU', + choices=('PAX', 'GNU'), + help='select tar format: PAX or GNU') subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to export') diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 64d294b03..48637b1f7 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -3409,7 +3409,7 @@ def test_export_tar(self): os.unlink('input/flagfile') self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress') + self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress', '--tar-format=GNU') with changedir('output'): # This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask. subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp']) @@ -3424,7 +3424,8 @@ def test_export_tar_gz(self): os.unlink('input/flagfile') self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', '--list') + list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', + '--list', '--tar-format=GNU') assert 'input/file1\n' in list assert 'input/dir2\n' in list with changedir('output'): @@ -3439,7 +3440,8 @@ def test_export_tar_strip_components(self): os.unlink('input/flagfile') self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--strip-components=1', '--list') + list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', + '--strip-components=1', '--list', '--tar-format=GNU') # --list's path are those before processing with --strip-components assert 'input/file1\n' in list assert 'input/dir2\n' in list @@ -3451,7 +3453,8 @@ def test_export_tar_strip_components(self): @requires_gnutar def test_export_tar_strip_components_links(self): self._extract_hardlinks_setup() - self.cmd('export-tar', self.repository_location + '::test', 'output.tar', '--strip-components=2') + self.cmd('export-tar', self.repository_location + '::test', 'output.tar', + '--strip-components=2', '--tar-format=GNU') with changedir('output'): subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp']) assert os.stat('hardlink').st_nlink == 2 @@ -3463,7 +3466,7 @@ def test_export_tar_strip_components_links(self): @requires_gnutar def test_extract_hardlinks_tar(self): self._extract_hardlinks_setup() - self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1') + self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1', '--tar-format=GNU') with changedir('output'): subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp']) assert os.stat('input/dir1/hardlink').st_nlink == 2 @@ -3471,26 +3474,26 @@ def test_extract_hardlinks_tar(self): assert os.stat('input/dir1/aaaa').st_nlink == 2 assert os.stat('input/dir1/source2').st_nlink == 2 - def test_import_tar(self): + def test_import_tar(self, tar_format='PAX'): self.create_test_files() os.unlink('input/flagfile') self.cmd('init', '--encryption=none', self.repository_location) self.cmd('create', self.repository_location + '::src', 'input') - self.cmd('export-tar', self.repository_location + '::src', 'simple.tar') + self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', f'--tar-format={tar_format}') self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar') with changedir(self.output_path): self.cmd('extract', self.repository_location + '::dst') self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True) @requires_gzip - def test_import_tar_gz(self): + def test_import_tar_gz(self, tar_format='GNU'): if not shutil.which('gzip'): pytest.skip('gzip is not installed') self.create_test_files() os.unlink('input/flagfile') self.cmd('init', '--encryption=none', self.repository_location) self.cmd('create', self.repository_location + '::src', 'input') - self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz') + self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz', f'--tar-format={tar_format}') self.cmd('import-tar', self.repository_location + '::dst', 'simple.tgz') with changedir(self.output_path): self.cmd('extract', self.repository_location + '::dst')