#! /usr/bin/env python """Dump archive contents, test extraction.""" from __future__ import division, absolute_import, print_function import io import sys import getopt from datetime import datetime import rarfile as rf usage = """ dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST] switches: @file read archive names from file -pPSW set password -Ccharset set fallback charset -v increase verbosity -t attempt to read all files -x write read files out -c show archive comment -h show usage -- stop switch parsing """.strip() os_list = ['DOS', 'OS2', 'WIN', 'UNIX', 'MACOS', 'BEOS'] block_strs = ['MARK', 'MAIN', 'FILE', 'OLD_COMMENT', 'OLD_EXTRA', 'OLD_SUB', 'OLD_RECOVERY', 'OLD_AUTH', 'SUB', 'ENDARC'] r5_block_types = { rf.RAR5_BLOCK_MAIN: 'R5_MAIN', rf.RAR5_BLOCK_FILE: 'R5_FILE', rf.RAR5_BLOCK_SERVICE: 'R5_SVC', rf.RAR5_BLOCK_ENCRYPTION: 'R5_ENC', rf.RAR5_BLOCK_ENDARC: 'R5_ENDARC', } def rar3_type(btype): """RAR3 type code as string.""" if btype < rf.RAR_BLOCK_MARK or btype > rf.RAR_BLOCK_ENDARC: return "*UNKNOWN*" return block_strs[btype - rf.RAR_BLOCK_MARK] def rar5_type(btype): """RAR5 type code as string.""" return r5_block_types.get(btype, '*UNKNOWN*') main_bits = ( (rf.RAR_MAIN_VOLUME, "VOL"), (rf.RAR_MAIN_COMMENT, "COMMENT"), (rf.RAR_MAIN_LOCK, "LOCK"), (rf.RAR_MAIN_SOLID, "SOLID"), (rf.RAR_MAIN_NEWNUMBERING, "NEWNR"), (rf.RAR_MAIN_AUTH, "AUTH"), (rf.RAR_MAIN_RECOVERY, "RECOVERY"), (rf.RAR_MAIN_PASSWORD, "PASSWORD"), (rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"), (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"), (rf.RAR_LONG_BLOCK, "LONG"), ) endarc_bits = ( (rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"), (rf.RAR_ENDARC_DATACRC, "DATACRC"), (rf.RAR_ENDARC_REVSPACE, "REVSPACE"), (rf.RAR_ENDARC_VOLNR, "VOLNR"), (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"), (rf.RAR_LONG_BLOCK, "LONG"), ) file_bits = ( (rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"), (rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"), (rf.RAR_FILE_PASSWORD, "PASSWORD"), (rf.RAR_FILE_COMMENT, "COMMENT"), (rf.RAR_FILE_SOLID, "SOLID"), (rf.RAR_FILE_LARGE, "LARGE"), (rf.RAR_FILE_UNICODE, "UNICODE"), (rf.RAR_FILE_SALT, "SALT"), (rf.RAR_FILE_VERSION, "VERSION"), (rf.RAR_FILE_EXTTIME, "EXTTIME"), (rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"), (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"), (rf.RAR_LONG_BLOCK, "LONG"), ) generic_bits = ( (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"), (rf.RAR_LONG_BLOCK, "LONG"), ) file_parms = ("D64", "D128", "D256", "D512", "D1024", "D2048", "D4096", "DIR") r5_block_flags = ( (rf.RAR5_BLOCK_FLAG_EXTRA_DATA, 'EXTRA'), (rf.RAR5_BLOCK_FLAG_DATA_AREA, 'DATA'), (rf.RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN, 'SKIP'), (rf.RAR5_BLOCK_FLAG_SPLIT_BEFORE, 'SPLIT_BEFORE'), (rf.RAR5_BLOCK_FLAG_SPLIT_AFTER, 'SPLIT_AFTER'), (rf.RAR5_BLOCK_FLAG_DEPENDS_PREV, 'DEPENDS'), (rf.RAR5_BLOCK_FLAG_KEEP_WITH_PARENT, 'KEEP'), ) r5_main_flags = ( (rf.RAR5_MAIN_FLAG_ISVOL, 'ISVOL'), (rf.RAR5_MAIN_FLAG_HAS_VOLNR, 'VOLNR'), (rf.RAR5_MAIN_FLAG_SOLID, 'SOLID'), (rf.RAR5_MAIN_FLAG_RECOVERY, 'RECOVERY'), (rf.RAR5_MAIN_FLAG_LOCKED, 'LOCKED'), ) r5_file_flags = ( (rf.RAR5_FILE_FLAG_ISDIR, 'DIR'), (rf.RAR5_FILE_FLAG_HAS_MTIME, 'MTIME'), (rf.RAR5_FILE_FLAG_HAS_CRC32, 'CRC32'), (rf.RAR5_FILE_FLAG_UNKNOWN_SIZE, 'NOSIZE'), ) r5_enc_flags = ( (rf.RAR5_ENC_FLAG_HAS_CHECKVAL, 'CHECKVAL'), ) r5_endarc_flags = ( (rf.RAR5_ENDARC_FLAG_NEXT_VOL, 'NEXTVOL'), ) r5_file_enc_flags = ( (rf.RAR5_XENC_CHECKVAL, 'CHECKVAL'), (rf.RAR5_XENC_TWEAKED, 'TWEAKED'), ) r5_file_redir_types = { rf.RAR5_XREDIR_UNIX_SYMLINK: 'UNIX_SYMLINK', rf.RAR5_XREDIR_WINDOWS_SYMLINK: 'WINDOWS_SYMLINK', rf.RAR5_XREDIR_WINDOWS_JUNCTION: 'WINDOWS_JUNCTION', rf.RAR5_XREDIR_HARD_LINK: 'HARD_LINK', rf.RAR5_XREDIR_FILE_COPY: 'FILE_COPY', } r5_file_redir_flags = ( (rf.RAR5_XREDIR_ISDIR, 'DIR'), ) def xprint(m, *args): """Print string to stdout. Format unicode safely. """ if sys.hexversion < 0x3000000: m = m.decode('utf8') if args: m = m % args if sys.hexversion < 0x3000000: m = m.encode('utf8') sys.stdout.write(m) sys.stdout.write('\n') def render_flags(flags, bit_list): """Show bit names. """ res = [] known = 0 for bit in bit_list: known = known | bit[0] if flags & bit[0]: res.append(bit[1]) unknown = flags & ~known n = 0 while unknown: if unknown & 1: res.append("UNK_%04x" % (1 << n)) unknown = unknown >> 1 n += 1 if not res: return '-' return ",".join(res) def get_file_flags(flags): """Show flag names and handle dict size. """ res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits) xf = (flags & rf.RAR_FILE_DICTMASK) >> 5 res += "," + file_parms[xf] return res def fmt_time(t): """Format time. """ if t is None: return '(-)' if isinstance(t, datetime): return t.isoformat('T') return "%04d-%02d-%02d %02d:%02d:%02d" % t def show_item(h): """Show any RAR3/5 record. """ if isinstance(h, rf.Rar3Info): show_item_v3(h) elif isinstance(h, rf.Rar5Info): show_item_v5(h) else: xprint('Unknown info record') def show_item_v3(h): """Show any RAR3 record. """ st = rar3_type(h.type) xprint("%s: hdrlen=%d datlen=%d", st, h.header_size, h.add_size) if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB): if h.host_os == rf.RAR_OS_UNIX: s_mode = "0%o" % h.mode else: s_mode = "0x%x" % h.mode xprint(" flags=0x%04x:%s", h.flags, get_file_flags(h.flags)) if h.host_os >= 0 and h.host_os < len(os_list): s_os = os_list[h.host_os] else: s_os = "?" xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d", h.host_os, s_os, h.extract_version, s_mode, h.compress_type, h.compress_size, h.file_size, h.volume) ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1) xprint(" crc=0x%08x (%d) date_time=%s", ucrc, h.CRC, fmt_time(h.date_time)) xprint(" name=%s", h.filename) if h.mtime: xprint(" mtime=%s", fmt_time(h.mtime)) if h.ctime: xprint(" ctime=%s", fmt_time(h.ctime)) if h.atime: xprint(" atime=%s", fmt_time(h.atime)) if h.arctime: xprint(" arctime=%s", fmt_time(h.arctime)) elif h.type == rf.RAR_BLOCK_MAIN: xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, main_bits)) elif h.type == rf.RAR_BLOCK_ENDARC: xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, endarc_bits)) elif h.type == rf.RAR_BLOCK_MARK: xprint(" flags=0x%04x:", h.flags) else: xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, generic_bits)) if h.comment is not None: cm = repr(h.comment) if cm[0] == 'u': cm = cm[1:] xprint(" comment=%s", cm) def show_item_v5(h): """Show any RAR5 record. """ st = rar5_type(h.block_type) xprint("%s: hdrlen=%d datlen=%d hdr_extra=%d", st, h.header_size, h.compress_size, h.block_extra_size) xprint(" block_flags=0x%04x:%s", h.block_flags, render_flags(h.block_flags, r5_block_flags)) if h.block_type in (rf.RAR5_BLOCK_FILE, rf.RAR5_BLOCK_SERVICE): xprint(" name=%s", h.filename) if h.file_host_os == rf.RAR5_OS_UNIX: s_os = 'UNIX' s_mode = "0%o" % h.mode else: s_os = 'WINDOWS' s_mode = "0x%x" % h.mode xprint(" file_flags=0x%04x:%s", h.file_flags, render_flags(h.file_flags, r5_file_flags)) cmp_flags = h.file_compress_flags xprint(" cmp_algo=%d cmp_meth=%d dict=%d solid=%r", cmp_flags & 0x3f, (cmp_flags >> 7) & 0x07, cmp_flags >> 10, cmp_flags & rf.RAR5_COMPR_SOLID > 0) xprint(" os=%d:%s mode=%s cmp=%r dec=%r vol=%r", h.file_host_os, s_os, s_mode, h.compress_size, h.file_size, h.volume) if h.CRC is not None: xprint(" crc=0x%08x (%d)", h.CRC, h.CRC) if h.blake2sp_hash is not None: xprint(" blake2sp=%s", rf.tohex(h.blake2sp_hash)) if h.date_time is not None: xprint(" date_time=%s", fmt_time(h.date_time)) if h.mtime: xprint(" mtime=%s", fmt_time(h.mtime)) if h.ctime: xprint(" ctime=%s", fmt_time(h.ctime)) if h.atime: xprint(" atime=%s", fmt_time(h.atime)) if h.arctime: xprint(" arctime=%s", fmt_time(h.arctime)) if h.flags & rf.RAR_FILE_PASSWORD: enc_algo, enc_flags, kdf_count, salt, iv, checkval = h.file_encryption algo_name = 'AES256' if enc_algo == rf.RAR5_XENC_CIPHER_AES256 else 'UnknownAlgo' xprint(' algo=%d:%s enc_flags=%04x:%s kdf_lg=%d kdf_count=%d salt=%s iv=%s checkval=%s', enc_algo, algo_name, enc_flags, render_flags(enc_flags, r5_file_enc_flags), kdf_count, 1 << kdf_count, rf.tohex(salt), rf.tohex(iv), checkval and rf.tohex(checkval) or '-') if h.file_redir: redir_type, redir_flags, redir_name = h.file_redir xprint(' redir: type=%s flags=%d:%s destination=%s', r5_file_redir_types.get(redir_type, 'Unknown'), redir_flags, render_flags(redir_flags, r5_file_redir_flags), redir_name) if h.file_owner: uname, gname, uid, gid = h.file_owner xprint(' owner: name=%r group=%r uid=%r gid=%r', uname, gname, uid, gid) if h.file_version: flags, version = h.file_version xprint(' version: flags=%r version=%r', flags, version) elif h.block_type == rf.RAR5_BLOCK_MAIN: xprint(" flags=0x%04x:%s", h.flags, render_flags(h.main_flags, r5_main_flags)) elif h.block_type == rf.RAR5_BLOCK_ENDARC: xprint(" flags=0x%04x:%s", h.flags, render_flags(h.endarc_flags, r5_endarc_flags)) elif h.block_type == rf.RAR5_BLOCK_ENCRYPTION: algo_name = 'AES256' if h.encryption_algo == rf.RAR5_XENC_CIPHER_AES256 else 'UnknownAlgo' xprint(" algo=%d:%s flags=0x%04x:%s", h.encryption_algo, algo_name, h.flags, render_flags(h.encryption_flags, r5_enc_flags)) xprint(" kdf_lg=%d kdf_count=%d", h.encryption_kdf_count, 1 << h.encryption_kdf_count) xprint(" salt=%s", rf.tohex(h.encryption_salt)) else: xprint(" - missing info -") if h.comment is not None: cm = repr(h.comment) if cm[0] == 'u': cm = cm[1:] xprint(" comment=%s", cm) cf_show_comment = 0 cf_verbose = 0 cf_charset = None cf_extract = 0 cf_test_read = 0 cf_test_unrar = 0 cf_test_memory = 0 def check_crc(f, inf, desc): """Compare result crc to expected value. """ exp = inf._md_expect if exp is None: return ucrc = f._md_context.digest() if ucrc != exp: print('crc error - %s - exp=%r got=%r' % (desc, exp, ucrc)) def test_read_long(r, inf): """Test read and readinto. """ md_class = inf._md_class or rf.NoHashContext bctx = md_class() f = r.open(inf.filename) total = 0 while 1: data = f.read(8192) if not data: break bctx.update(data) total += len(data) if total != inf.file_size: xprint("\n *** %s has corrupt file: %s ***", r.rarfile, inf.filename) xprint(" *** short read: got=%d, need=%d ***\n", total, inf.file_size) check_crc(f, inf, 'read') bhash = bctx.hexdigest() if cf_verbose > 1: if f._md_context.digest() == inf._md_expect: #xprint(" checkhash: %r", bhash) pass else: xprint(" checkhash: %r got=%r exp=%r cls=%r\n", bhash, f._md_context.digest(), inf._md_expect, inf._md_class) # test .seek() & .readinto() if cf_test_read > 1: f.seek(0, 0) total = 0 buf = bytearray(rf.ZERO * 1024) while 1: res = f.readinto(buf) if not res: break total += res if inf.file_size != total: xprint(" *** readinto failed: got=%d, need=%d ***\n", total, inf.file_size) #check_crc(f, inf, 'readinto') f.close() def test_read(r, inf): """Test file read.""" test_read_long(r, inf) def test_real(fn, psw): """Actual archive processing. """ xprint("Archive: %s", fn) cb = None if cf_verbose > 1: cb = show_item rfarg = fn if cf_test_memory: rfarg = io.BytesIO(open(fn, 'rb').read()) # check if rar if not rf.is_rarfile(rfarg): xprint(" --- %s is not a RAR file ---", fn) return # open r = rf.RarFile(rfarg, charset=cf_charset, info_callback=cb) # set password if r.needs_password(): if psw: r.setpassword(psw) else: xprint(" --- %s requires password ---", fn) return # show comment if cf_show_comment and r.comment: for ln in r.comment.split('\n'): xprint(" %s", ln) elif cf_verbose > 0 and r.comment: cm = repr(r.comment) if cm[0] == 'u': cm = cm[1:] xprint(" comment=%s", cm) # process for n in r.namelist(): inf = r.getinfo(n) if inf.isdir(): continue if cf_verbose == 1: show_item(inf) if cf_test_read: test_read(r, inf) if cf_extract: r.extractall() for inf in r.infolist(): r.extract(inf) if cf_test_unrar: r.testrar() def test(fn, psw): """Process one archive with error handling. """ try: test_real(fn, psw) except rf.NeedFirstVolume: xprint(" --- %s is middle part of multi-vol archive ---", fn) except rf.Error: exc, msg, tb = sys.exc_info() xprint("\n *** %s: %s ***\n", exc.__name__, msg) del tb except IOError: exc, msg, tb = sys.exc_info() xprint("\n *** %s: %s ***\n", exc.__name__, msg) del tb def main(): """Program entry point. """ global cf_verbose, cf_show_comment, cf_charset global cf_extract, cf_test_read, cf_test_unrar global cf_test_memory psw = None # parse args try: opts, args = getopt.getopt(sys.argv[1:], 'p:C:hvcxtRM') except getopt.error as ex: print(str(ex), file=sys.stderr) sys.exit(1) for o, v in opts: if o == '-p': psw = v elif o == '-h': xprint(usage) return elif o == '-v': cf_verbose += 1 elif o == '-c': cf_show_comment = 1 elif o == '-x': cf_extract = 1 elif o == '-t': cf_test_read += 1 elif o == '-T': cf_test_unrar = 1 elif o == '-M': cf_test_memory = 1 elif o == '-C': cf_charset = v else: raise Exception("unhandled switch: " + o) args2 = [] for a in args: if a[0] == "@": for ln in open(a[1:], 'r'): fn = ln[:-1] args2.append(fn) else: args2.append(a) args = args2 if not args: xprint(usage) # pypy .readinto()+memoryview() is buggy #if cf_test_read > 1 and hasattr(sys, 'pypy_version_info'): # cf_test_read = 1 for fn in args: test(fn, psw) if __name__ == '__main__': try: main() except KeyboardInterrupt: pass