1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-01 04:37:34 +00:00

Merge pull request #3824 from ThomasWaldmann/finding-pieces

some more debug commands
This commit is contained in:
TW 2018-08-09 07:27:54 +02:00 committed by GitHub
commit 64f2fc0a65
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 137 additions and 8 deletions

View file

@ -74,7 +74,7 @@
from .item import Item from .item import Item
from .platform import get_flags, get_process_id, SyncFile from .platform import get_flags, get_process_id, SyncFile
from .remote import RepositoryServer, RemoteRepository, cache_if_remote from .remote import RepositoryServer, RemoteRepository, cache_if_remote
from .repository import Repository, LIST_SCAN_LIMIT from .repository import Repository, LIST_SCAN_LIMIT, TAG_PUT, TAG_DELETE, TAG_COMMIT
from .selftest import selftest from .selftest import selftest
from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader
@ -1930,7 +1930,85 @@ def do_debug_dump_manifest(self, args, repository, manifest, key):
@with_repository(manifest=False) @with_repository(manifest=False)
def do_debug_dump_repo_objs(self, args, repository): def do_debug_dump_repo_objs(self, args, repository):
"""dump (decrypted, decompressed) repo objects""" """dump (decrypted, decompressed) repo objects, repo index MUST be current/correct"""
from .crypto.key import key_factory
def decrypt_dump(i, id, cdata, tag=None, segment=None, offset=None):
if cdata is not None:
give_id = id if id != Manifest.MANIFEST_ID else None
data = key.decrypt(give_id, cdata)
else:
data = b''
tag_str = '' if tag is None else '_' + tag
segment_str = '_' + str(segment) if segment is not None else ''
offset_str = '_' + str(offset) if offset is not None else ''
id_str = '_' + bin_to_hex(id) if id is not None else ''
filename = '%08d%s%s%s%s.obj' % (i, segment_str, offset_str, tag_str, id_str)
print('Dumping', filename)
with open(filename, 'wb') as fd:
fd.write(data)
if args.ghost:
# dump ghosty stuff from segment files: not yet committed objects, deleted / superceded objects, commit tags
# set up the key without depending on a manifest obj
for id, cdata, tag, segment, offset in repository.scan_low_level():
if tag == TAG_PUT:
key = key_factory(repository, cdata)
break
i = 0
for id, cdata, tag, segment, offset in repository.scan_low_level():
if tag == TAG_PUT:
decrypt_dump(i, id, cdata, tag='put', segment=segment, offset=offset)
elif tag == TAG_DELETE:
decrypt_dump(i, id, None, tag='del', segment=segment, offset=offset)
elif tag == TAG_COMMIT:
decrypt_dump(i, None, None, tag='commit', segment=segment, offset=offset)
i += 1
else:
# set up the key without depending on a manifest obj
ids = repository.list(limit=1, marker=None)
cdata = repository.get(ids[0])
key = key_factory(repository, cdata)
marker = None
i = 0
while True:
result = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here
if not result:
break
marker = result[-1]
for id in result:
cdata = repository.get(id)
decrypt_dump(i, id, cdata)
i += 1
print('Done.')
return EXIT_SUCCESS
@with_repository(manifest=False)
def do_debug_search_repo_objs(self, args, repository):
"""search for byte sequences in repo objects, repo index MUST be current/correct"""
context = 32
def print_finding(info, wanted, data, offset):
before = data[offset - context:offset]
after = data[offset + len(wanted):offset + len(wanted) + context]
print('%s: %s %s %s == %r %r %r' % (info, before.hex(), wanted.hex(), after.hex(),
before, wanted, after))
wanted = args.wanted
try:
if wanted.startswith('hex:'):
wanted = unhexlify(wanted[4:])
elif wanted.startswith('str:'):
wanted = wanted[4:].encode('utf-8')
else:
raise ValueError('unsupported search term')
except (ValueError, UnicodeEncodeError):
wanted = None
if not wanted:
self.print_error('search term needs to be hex:123abc or str:foobar style')
return EXIT_ERROR
from .crypto.key import key_factory from .crypto.key import key_factory
# set up the key without depending on a manifest obj # set up the key without depending on a manifest obj
ids = repository.list(limit=1, marker=None) ids = repository.list(limit=1, marker=None)
@ -1938,9 +2016,11 @@ def do_debug_dump_repo_objs(self, args, repository):
key = key_factory(repository, cdata) key = key_factory(repository, cdata)
marker = None marker = None
last_data = b''
last_id = None
i = 0 i = 0
while True: while True:
result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker) result = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here
if not result: if not result:
break break
marker = result[-1] marker = result[-1]
@ -1948,11 +2028,26 @@ def do_debug_dump_repo_objs(self, args, repository):
cdata = repository.get(id) cdata = repository.get(id)
give_id = id if id != Manifest.MANIFEST_ID else None give_id = id if id != Manifest.MANIFEST_ID else None
data = key.decrypt(give_id, cdata) data = key.decrypt(give_id, cdata)
filename = '%06d_%s.obj' % (i, bin_to_hex(id))
print('Dumping', filename) # try to locate wanted sequence crossing the border of last_data and data
with open(filename, 'wb') as fd: boundary_data = last_data[-(len(wanted) - 1):] + data[:len(wanted) - 1]
fd.write(data) if wanted in boundary_data:
boundary_data = last_data[-(len(wanted) - 1 + context):] + data[:len(wanted) - 1 + context]
offset = boundary_data.find(wanted)
info = '%d %s | %s' % (i, last_id.hex(), id.hex())
print_finding(info, wanted, boundary_data, offset)
# try to locate wanted sequence in data
count = data.count(wanted)
if count:
offset = data.find(wanted) # only determine first occurance's offset
info = "%d %s #%d" % (i, id.hex(), count)
print_finding(info, wanted, data, offset)
last_id, last_data = id, data
i += 1 i += 1
if i % 10000 == 0:
print('%d objects processed.' % i)
print('Done.') print('Done.')
return EXIT_SUCCESS return EXIT_SUCCESS
@ -3995,6 +4090,23 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
subparser.add_argument('location', metavar='REPOSITORY', subparser.add_argument('location', metavar='REPOSITORY',
type=location_validator(archive=False), type=location_validator(archive=False),
help='repo to dump') help='repo to dump')
subparser.add_argument('--ghost', dest='ghost', action='store_true',
help='dump all segment file contents, including deleted/uncommitted objects and commits.')
debug_search_repo_objs_epilog = process_epilog("""
This command searches raw (but decrypted and decompressed) repo objects for a specific bytes sequence.
""")
subparser = debug_parsers.add_parser('search-repo-objs', parents=[common_parser], add_help=False,
description=self.do_debug_search_repo_objs.__doc__,
epilog=debug_search_repo_objs_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help='search repo objects (debug)')
subparser.set_defaults(func=self.do_debug_search_repo_objs)
subparser.add_argument('location', metavar='REPOSITORY',
type=location_validator(archive=False),
help='repo to search')
subparser.add_argument('wanted', metavar='WANTED', type=str,
help='term to search the repo for, either 0x1234abcd hex term or a string')
debug_get_obj_epilog = process_epilog(""" debug_get_obj_epilog = process_epilog("""
This command gets an object from the repository. This command gets an object from the repository.

View file

@ -962,6 +962,23 @@ def report_error(msg):
logger.info('Completed repository check, no problems found.') logger.info('Completed repository check, no problems found.')
return not error_found or repair return not error_found or repair
def scan_low_level(self):
"""Very low level scan over all segment file entries.
It does NOT care about what's committed and what not.
It does NOT care whether an object might be deleted or superceded later.
It just yields anything it finds in the segment files.
This is intended as a last-resort way to get access to all repo contents of damaged repos,
when there is uncommitted, but valuable data in there...
"""
for segment, filename in self.io.segment_iterator():
try:
for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
yield key, data, tag, segment, offset
except IntegrityError as err:
logger.error('Segment %d (%s) has IntegrityError(s) [%s] - skipping.' % (segment, filename, str(err)))
def _rollback(self, *, cleanup): def _rollback(self, *, cleanup):
""" """
""" """

View file

@ -2335,7 +2335,7 @@ def test_debug_dump_repo_objs(self):
with changedir('output'): with changedir('output'):
output = self.cmd('debug', 'dump-repo-objs', self.repository_location) output = self.cmd('debug', 'dump-repo-objs', self.repository_location)
output_dir = sorted(os.listdir('output')) output_dir = sorted(os.listdir('output'))
assert len(output_dir) > 0 and output_dir[0].startswith('000000_') assert len(output_dir) > 0 and output_dir[0].startswith('00000000_')
assert 'Done.' in output assert 'Done.' in output
def test_debug_put_get_delete_obj(self): def test_debug_put_get_delete_obj(self):