From 1428ffeae90aea899b70ee0cd96812bf7286d6e0 Mon Sep 17 00:00:00 2001 From: Soumik Dutta <78898665+shalearkane@users.noreply.github.com> Date: Wed, 29 Mar 2023 01:15:48 +0530 Subject: [PATCH] Add parse-obj and format-obj debug commands (#7443) add parse-obj and format-obj debug commands, fixes #7406 Signed-off-by: Soumik Dutta --- src/borg/archiver/debug_cmd.py | 122 ++++++++++++++++++++++ src/borg/testsuite/archiver/debug_cmds.py | 59 +++++++++++ 2 files changed, 181 insertions(+) diff --git a/src/borg/archiver/debug_cmd.py b/src/borg/archiver/debug_cmd.py index 20a930364..8c133f839 100644 --- a/src/borg/archiver/debug_cmd.py +++ b/src/borg/archiver/debug_cmd.py @@ -5,6 +5,7 @@ import textwrap from ..archive import Archive +from ..compress import CompressionSpec from ..constants import * # NOQA from ..helpers import msgpack from ..helpers import sysinfo @@ -265,6 +266,61 @@ def do_debug_id_hash(self, args, repository, manifest): print(id.hex()) return EXIT_SUCCESS + @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) + def do_debug_parse_obj(self, args, repository, manifest): + """parse borg object file into meta dict and data (decrypting, decompressing)""" + + # get the object from id + hex_id = args.id + try: + id = unhexlify(hex_id) + if len(id) != 32: # 256bit + raise ValueError("id must be 256bits or 64 hex digits") + except ValueError as err: + print("object id %s is invalid [%s]." % (hex_id, str(err))) + return EXIT_ERROR + + with open(args.object_path, "rb") as f: + cdata = f.read() + + repo_objs = manifest.repo_objs + meta, data = repo_objs.parse(id=id, cdata=cdata) + + with open(args.json_path, "w") as f: + json.dump(meta, f) + + with open(args.binary_path, "wb") as f: + f.write(data) + + return EXIT_SUCCESS + + @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) + def do_debug_format_obj(self, args, repository, manifest): + """format file and metadata into borg object file""" + + # get the object from id + hex_id = args.id + try: + id = unhexlify(hex_id) + if len(id) != 32: # 256bit + raise ValueError("id must be 256bits or 64 hex digits") + except ValueError as err: + print("object id %s is invalid [%s]." % (hex_id, str(err))) + return EXIT_ERROR + + with open(args.binary_path, "rb") as f: + data = f.read() + + with open(args.json_path, "r") as f: + meta = json.load(f) + + repo_objs = manifest.repo_objs + data_encrypted = repo_objs.format(id=id, meta=meta, data=data) + + with open(args.object_path, "wb") as f: + f.write(data_encrypted) + return EXIT_SUCCESS + @with_repository(manifest=False, exclusive=True) def do_debug_put_obj(self, args, repository): """put file contents into the repository""" @@ -518,6 +574,72 @@ def build_parser_debug(self, subparsers, common_parser, mid_common_parser): "path", metavar="PATH", type=str, help="content for which the id-hash shall get computed" ) + # parse_obj + debug_parse_obj_epilog = process_epilog( + """ + This command parses the object file into metadata (as json) and uncompressed data. + """ + ) + subparser = debug_parsers.add_parser( + "parse-obj", + parents=[common_parser], + add_help=False, + description=self.do_debug_parse_obj.__doc__, + epilog=debug_parse_obj_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help="parse borg object file into meta dict and data", + ) + subparser.set_defaults(func=self.do_debug_parse_obj) + subparser.add_argument("id", metavar="ID", type=str, help="hex object ID to get from the repo") + subparser.add_argument( + "object_path", metavar="OBJECT_PATH", type=str, help="path of the object file to parse data from" + ) + subparser.add_argument( + "binary_path", metavar="BINARY_PATH", type=str, help="path of the file to write uncompressed data into" + ) + subparser.add_argument( + "json_path", metavar="JSON_PATH", type=str, help="path of the json file to write metadata into" + ) + + # format_obj + debug_format_obj_epilog = process_epilog( + """ + This command formats the file and metadata into objectfile. + """ + ) + subparser = debug_parsers.add_parser( + "format-obj", + parents=[common_parser], + add_help=False, + description=self.do_debug_format_obj.__doc__, + epilog=debug_format_obj_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help="format file and metadata into borg objectfile", + ) + subparser.set_defaults(func=self.do_debug_format_obj) + subparser.add_argument("id", metavar="ID", type=str, help="hex object ID to get from the repo") + subparser.add_argument( + "binary_path", metavar="BINARY_PATH", type=str, help="path of the file to convert into objectfile" + ) + subparser.add_argument( + "json_path", metavar="JSON_PATH", type=str, help="path of the json file to read metadata from" + ) + subparser.add_argument( + "-C", + "--compression", + metavar="COMPRESSION", + dest="compression", + type=CompressionSpec, + default=CompressionSpec("lz4"), + help="select compression algorithm, see the output of the " '"borg help compression" command for details.', + ) + subparser.add_argument( + "object_path", + metavar="OBJECT_PATH", + type=str, + help="path of the objectfile to write compressed encrypted data into", + ) + debug_get_obj_epilog = process_epilog( """ This command gets an object from the repository. diff --git a/src/borg/testsuite/archiver/debug_cmds.py b/src/borg/testsuite/archiver/debug_cmds.py index b37eed5b5..ee22da558 100644 --- a/src/borg/testsuite/archiver/debug_cmds.py +++ b/src/borg/testsuite/archiver/debug_cmds.py @@ -6,6 +6,7 @@ from ...constants import * # NOQA from .. import changedir from . import ArchiverTestCaseBase, RemoteArchiverTestCaseBase, ArchiverTestCaseBinaryBase, RK_ENCRYPTION, BORG_EXES +from ..compress import Compressor class ArchiverTestCase(ArchiverTestCaseBase): @@ -63,6 +64,64 @@ def test_debug_put_get_delete_obj(self): output = self.cmd(f"--repo={self.repository_location}", "debug", "delete-obj", "invalid") assert "is invalid" in output + def test_debug_id_hash_format_put_get_parse_obj(self): + """Test format-obj and parse-obj commands""" + + self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) + data = b"some data" * 100 + meta_dict = {"some": "property"} + meta = json.dumps(meta_dict).encode() + + self.create_regular_file("plain.bin", contents=data) + self.create_regular_file("meta.json", contents=meta) + + output = self.cmd(f"--repo={self.repository_location}", "debug", "id-hash", "input/plain.bin") + id_hash = output.strip() + + output = self.cmd( + f"--repo={self.repository_location}", + "debug", + "format-obj", + id_hash, + "input/plain.bin", + "input/meta.json", + "output/data.bin", + "--compression=zstd,2", + ) + + output = self.cmd(f"--repo={self.repository_location}", "debug", "put-obj", id_hash, "output/data.bin") + assert id_hash in output + + output = self.cmd(f"--repo={self.repository_location}", "debug", "get-obj", id_hash, "output/object.bin") + assert id_hash in output + + output = self.cmd( + f"--repo={self.repository_location}", + "debug", + "parse-obj", + id_hash, + "output/object.bin", + "output/plain.bin", + "output/meta.json", + ) + + with open("output/plain.bin", "rb") as f: + data_read = f.read() + assert data == data_read + + with open("output/meta.json") as f: + meta_read = json.load(f) + for key, value in meta_dict.items(): + assert meta_read.get(key) == value + + assert meta_read.get("size") == len(data_read) + + c = Compressor(name="zstd", level=2) + _, data_compressed = c.compress(meta_dict, data=data) + assert meta_read.get("csize") == len(data_compressed) + assert meta_read.get("ctype") == c.compressor.ID + assert meta_read.get("clevel") == c.compressor.level + def test_debug_dump_manifest(self): self.create_regular_file("file1", size=1024 * 80) self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)