mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-21 13:47:16 +00:00
implement borg debug dump-repo-objs --ghost
intended as a last resort measure to export all segment file contents
in a relatively easy to use format.
if you want to dig into a damaged repo (e.g. missing segment files,
missing commits) and you know what you do.
note: dump-repo-objs --ghost must not use repo.list()
because this would need the repo index and call get_transaction_id and
check_transaction methods, which can easily fail on a damaged repo.
thus we use the same low level scan method as we use anyway to get
some encrypted piece of data to setup the decryption "key".
(cherry picked from commit 8738e85967
)
This commit is contained in:
parent
a3a15ddf57
commit
d6cb39a6d6
2 changed files with 65 additions and 18 deletions
|
@ -73,7 +73,7 @@
|
|||
from .item import Item
|
||||
from .platform import get_flags, get_process_id, SyncFile
|
||||
from .remote import RepositoryServer, RemoteRepository, cache_if_remote
|
||||
from .repository import Repository, LIST_SCAN_LIMIT
|
||||
from .repository import Repository, LIST_SCAN_LIMIT, TAG_PUT, TAG_DELETE, TAG_COMMIT
|
||||
from .selftest import selftest
|
||||
from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader
|
||||
|
||||
|
@ -1747,27 +1747,55 @@ def do_debug_dump_manifest(self, args, repository, manifest, key):
|
|||
def do_debug_dump_repo_objs(self, args, repository):
|
||||
"""dump (decrypted, decompressed) repo objects, repo index MUST be current/correct"""
|
||||
from .crypto.key import key_factory
|
||||
# set up the key without depending on a manifest obj
|
||||
ids = repository.list(limit=1, marker=None)
|
||||
cdata = repository.get(ids[0])
|
||||
key = key_factory(repository, cdata)
|
||||
|
||||
marker = None
|
||||
i = 0
|
||||
while True:
|
||||
result = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here
|
||||
if not result:
|
||||
break
|
||||
marker = result[-1]
|
||||
for id in result:
|
||||
cdata = repository.get(id)
|
||||
def decrypt_dump(i, id, cdata, tag=None, segment=None, offset=None):
|
||||
if cdata is not None:
|
||||
give_id = id if id != Manifest.MANIFEST_ID else None
|
||||
data = key.decrypt(give_id, cdata)
|
||||
filename = '%06d_%s.obj' % (i, bin_to_hex(id))
|
||||
print('Dumping', filename)
|
||||
with open(filename, 'wb') as fd:
|
||||
fd.write(data)
|
||||
else:
|
||||
data = b''
|
||||
tag_str = '' if tag is None else '_' + tag
|
||||
segment_str = '_' + str(segment) if segment is not None else ''
|
||||
offset_str = '_' + str(offset) if offset is not None else ''
|
||||
id_str = '_' + bin_to_hex(id) if id is not None else ''
|
||||
filename = '%06d%s%s%s%s.obj' % (i, tag_str, segment_str, offset_str, id_str)
|
||||
print('Dumping', filename)
|
||||
with open(filename, 'wb') as fd:
|
||||
fd.write(data)
|
||||
|
||||
if args.ghost:
|
||||
# dump ghosty stuff from segment files: not yet committed objects, deleted / superceded objects, commit tags
|
||||
|
||||
# set up the key without depending on a manifest obj
|
||||
for id, cdata, tag, segment, offset in repository.scan_low_level():
|
||||
if tag == TAG_PUT:
|
||||
key = key_factory(repository, cdata)
|
||||
break
|
||||
i = 0
|
||||
for id, cdata, tag, segment, offset in repository.scan_low_level():
|
||||
if tag == TAG_PUT:
|
||||
decrypt_dump(i, id, cdata, tag='put', segment=segment, offset=offset)
|
||||
elif tag == TAG_DELETE:
|
||||
decrypt_dump(i, id, None, tag='del', segment=segment, offset=offset)
|
||||
elif tag == TAG_COMMIT:
|
||||
decrypt_dump(i, None, None, tag='commit', segment=segment, offset=offset)
|
||||
i += 1
|
||||
else:
|
||||
# set up the key without depending on a manifest obj
|
||||
ids = repository.list(limit=1, marker=None)
|
||||
cdata = repository.get(ids[0])
|
||||
key = key_factory(repository, cdata)
|
||||
marker = None
|
||||
i = 0
|
||||
while True:
|
||||
result = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here
|
||||
if not result:
|
||||
break
|
||||
marker = result[-1]
|
||||
for id in result:
|
||||
cdata = repository.get(id)
|
||||
decrypt_dump(i, id, cdata)
|
||||
i += 1
|
||||
print('Done.')
|
||||
return EXIT_SUCCESS
|
||||
|
||||
|
@ -3929,6 +3957,8 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
|
|||
subparser.add_argument('location', metavar='REPOSITORY',
|
||||
type=location_validator(archive=False),
|
||||
help='repo to dump')
|
||||
subparser.add_argument('--ghost', dest='ghost', action='store_true',
|
||||
help='dump all segment file contents, including deleted/uncommitted objects and commits.')
|
||||
|
||||
debug_search_repo_objs_epilog = process_epilog("""
|
||||
This command searches raw (but decrypted and decompressed) repo objects for a specific bytes sequence.
|
||||
|
|
|
@ -973,6 +973,23 @@ def report_error(msg):
|
|||
logger.info('Completed repository check, no problems found.')
|
||||
return not error_found or repair
|
||||
|
||||
def scan_low_level(self):
|
||||
"""Very low level scan over all segment file entries.
|
||||
|
||||
It does NOT care about what's committed and what not.
|
||||
It does NOT care whether an object might be deleted or superceded later.
|
||||
It just yields anything it finds in the segment files.
|
||||
|
||||
This is intended as a last-resort way to get access to all repo contents of damaged repos,
|
||||
when there is uncommitted, but valuable data in there...
|
||||
"""
|
||||
for segment, filename in self.io.segment_iterator():
|
||||
try:
|
||||
for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
|
||||
yield key, data, tag, segment, offset
|
||||
except IntegrityError as err:
|
||||
logger.error('Segment %d (%s) has IntegrityError(s) [%s] - skipping.' % (segment, filename, str(err)))
|
||||
|
||||
def _rollback(self, *, cleanup):
|
||||
"""
|
||||
"""
|
||||
|
|
Loading…
Reference in a new issue