From 4b7cdbc5f38c68549e4c2e454f946078db6fb338 Mon Sep 17 00:00:00 2001 From: Julien Voisin Date: Sat, 2 Dec 2023 16:24:56 +0000 Subject: [PATCH] Added support for 7z by upgrading rarfile dependency - Bump rarfile.py from 4.0 to 4.1 - Add support for 7zip for archive extractions, since unar is a steaming pile of C# and unrar is non-free, having some easy-to-package software to handle archive makes it easier to run Bazarr on Linux distributions. --- bazarr/init.py | 20 +++- libs/rarfile.py | 287 +++++++++++++++++++++++++++++++++++------------ libs/version.txt | 2 +- 3 files changed, 232 insertions(+), 77 deletions(-) diff --git a/bazarr/init.py b/bazarr/init.py index 03264bf72..9db2b73ef 100644 --- a/bazarr/init.py +++ b/bazarr/init.py @@ -194,16 +194,28 @@ def init_binaries(): exe = get_binary("unar") rarfile.UNAR_TOOL = exe rarfile.UNRAR_TOOL = None - rarfile.tool_setup(unrar=False, unar=True, bsdtar=False, force=True) + rarfile.SEVENZIP_TOOL = None + rarfile.tool_setup(unrar=False, unar=True, bsdtar=False, sevenzip=False, force=True) except (BinaryNotFound, rarfile.RarCannotExec): try: exe = get_binary("unrar") rarfile.UNRAR_TOOL = exe rarfile.UNAR_TOOL = None - rarfile.tool_setup(unrar=True, unar=False, bsdtar=False, force=True) + rarfile.SEVENZIP_TOOL = None + rarfile.tool_setup(unrar=True, unar=False, bsdtar=False, sevenzip=False, force=True) except (BinaryNotFound, rarfile.RarCannotExec): - logging.exception("BAZARR requires a rar archive extraction utilities (unrar, unar) and it can't be found.") - raise BinaryNotFound + try: + exe = get_binary("7z") + rarfile.UNRAR_TOOL = None + rarfile.UNAR_TOOL = None + rarfile.SEVENZIP_TOOL = "7z" + rarfile.tool_setup(unrar=False, unar=False, bsdtar=False, sevenzip=True, force=True) + except (BinaryNotFound, rarfile.RarCannotExec): + logging.exception("BAZARR requires a rar archive extraction utilities (unrar, unar, 7zip) and it can't be found.") + raise BinaryNotFound + else: + logging.debug("Using 7zip from: %s", exe) + return exe else: logging.debug("Using UnRAR from: %s", exe) return exe diff --git a/libs/rarfile.py b/libs/rarfile.py index 58a59abb6..0399d7922 100644 --- a/libs/rarfile.py +++ b/libs/rarfile.py @@ -59,12 +59,14 @@ import sys import warnings from binascii import crc32, hexlify from datetime import datetime, timezone -from hashlib import blake2s, pbkdf2_hmac, sha1 +from hashlib import blake2s, pbkdf2_hmac, sha1, sha256 from pathlib import Path from struct import Struct, pack, unpack from subprocess import DEVNULL, PIPE, STDOUT, Popen from tempfile import mkstemp +AES = None + # only needed for encrypted headers try: try: @@ -90,10 +92,10 @@ class AES_CBC_Decrypt: self.decrypt = ciph.decryptor().update -__version__ = "4.0" +__version__ = "4.1" # export only interesting items -__all__ = ["is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"] +__all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"] ## ## Module configuration. Can be tuned after importing. @@ -108,6 +110,12 @@ UNAR_TOOL = "unar" #: executable for bsdtar tool BSDTAR_TOOL = "bsdtar" +#: executable for p7zip/7z tool +SEVENZIP_TOOL = "7z" + +#: executable for alternative 7z tool +SEVENZIP2_TOOL = "7zz" + #: default fallback charset DEFAULT_CHARSET = "windows-1252" @@ -280,6 +288,9 @@ DOS_MODE_SYSTEM = 0x04 DOS_MODE_HIDDEN = 0x02 DOS_MODE_READONLY = 0x01 +RAR5_PW_CHECK_SIZE = 8 +RAR5_PW_SUM_SIZE = 4 + ## ## internal constants ## @@ -298,17 +309,7 @@ _BAD_CHARS = r"""\x00-\x1F<>|"?*""" RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS) RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS) - -def _get_rar_version(xfile): - """Check quickly whether file is rar archive. - """ - with XFile(xfile) as fd: - buf = fd.read(len(RAR5_ID)) - if buf.startswith(RAR_ID): - return RAR_V3 - elif buf.startswith(RAR5_ID): - return RAR_V5 - return 0 +FORCE_TOOL = False def _find_sfx_header(xfile): @@ -340,10 +341,27 @@ def _find_sfx_header(xfile): ## Public interface ## + +def get_rar_version(xfile): + """Check quickly whether file is rar archive. + """ + with XFile(xfile) as fd: + buf = fd.read(len(RAR5_ID)) + if buf.startswith(RAR_ID): + return RAR_V3 + elif buf.startswith(RAR5_ID): + return RAR_V5 + return 0 + + def is_rarfile(xfile): """Check quickly whether file is rar archive. """ - return _get_rar_version(xfile) > 0 + try: + return get_rar_version(xfile) > 0 + except OSError: + # File not found or not accessible, ignore + return False def is_rarfile_sfx(xfile): @@ -634,6 +652,25 @@ class RarInfo: class RarFile: """Parse RAR structure, provide access to files in archive. + + Parameters: + + file + archive file name or file-like object. + mode + only "r" is supported. + charset + fallback charset to use, if filenames are not already Unicode-enabled. + info_callback + debug callback, gets to see all archive entries. + crc_check + set to False to disable CRC checks + errors + Either "stop" to quietly stop parsing on errors, + or "strict" to raise errors. Default is "stop". + part_only + If True, read only single file and allow it to be middle-part + of multi-volume archive. """ #: File name, if available. Unicode string or None. @@ -643,25 +680,7 @@ class RarFile: comment = None def __init__(self, file, mode="r", charset=None, info_callback=None, - crc_check=True, errors="stop"): - """Open and parse a RAR archive. - - Parameters: - - file - archive file name or file-like object. - mode - only "r" is supported. - charset - fallback charset to use, if filenames are not already Unicode-enabled. - info_callback - debug callback, gets to see all archive entries. - crc_check - set to False to disable CRC checks - errors - Either "stop" to quietly stop parsing on errors, - or "strict" to raise errors. Default is "stop". - """ + crc_check=True, errors="stop", part_only=False): if is_filelike(file): self.filename = getattr(file, "name", None) else: @@ -673,6 +692,7 @@ class RarFile: self._charset = charset or DEFAULT_CHARSET self._info_callback = info_callback self._crc_check = crc_check + self._part_only = part_only self._password = None self._file_parser = None @@ -740,6 +760,14 @@ class RarFile: """ return self._file_parser.getinfo(name) + def getinfo_orig(self, name): + """Return RarInfo for file source. + + RAR5: if name is hard-linked or copied file, + returns original entry with original filename. + """ + return self._file_parser.getinfo_orig(name) + def open(self, name, mode="r", pwd=None): """Returns file-like object (:class:`RarExtFile`) from where the data can be read. @@ -880,12 +908,12 @@ class RarFile: if ver == RAR_V3: p3 = RAR3Parser(self._rarfile, self._password, self._crc_check, self._charset, self._strict, self._info_callback, - sfx_ofs) + sfx_ofs, self._part_only) self._file_parser = p3 # noqa elif ver == RAR_V5: p5 = RAR5Parser(self._rarfile, self._password, self._crc_check, self._charset, self._strict, self._info_callback, - sfx_ofs) + sfx_ofs, self._part_only) self._file_parser = p5 # noqa else: raise NotRarFile("Not a RAR file") @@ -984,7 +1012,8 @@ class CommonParser: _password = None comment = None - def __init__(self, rarfile, password, crc_check, charset, strict, info_cb, sfx_offset): + def __init__(self, rarfile, password, crc_check, charset, strict, + info_cb, sfx_offset, part_only): self._rarfile = rarfile self._password = password self._crc_check = crc_check @@ -995,6 +1024,7 @@ class CommonParser: self._info_map = {} self._vol_list = [] self._sfx_offset = sfx_offset + self._part_only = part_only def has_header_encryption(self): """Returns True if headers are encrypted @@ -1043,7 +1073,16 @@ class CommonParser: try: return self._info_map[fname] except KeyError: - raise NoRarEntry("No such file: %s" % fname) + raise NoRarEntry("No such file: %s" % fname) from None + + def getinfo_orig(self, member): + inf = self.getinfo(member) + if inf.file_redir: + redir_type, redir_flags, redir_name = inf.file_redir + # cannot leave to unrar as it expects copied file to exist + if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK): + inf = self.getinfo(redir_name) + return inf def parse(self): """Process file.""" @@ -1080,7 +1119,7 @@ class CommonParser: if raise_need_first_vol: # did not find ENDARC with VOLNR raise NeedFirstVolume("Need to start from first volume", None) - if more_vols: + if more_vols and not self._part_only: volume += 1 fd.close() try: @@ -1098,6 +1137,7 @@ class CommonParser: endarc = False self._vol_list.append(volfile) self._main = None + self._hdrenc_main = None continue break h.volume = volume @@ -1105,7 +1145,7 @@ class CommonParser: if h.type == RAR_BLOCK_MAIN and not self._main: self._main = h - if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING): + if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING) and not self._part_only: # RAR 2.x does not set FIRSTVOLUME, # so check it only if NEWNUMBERING is used if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: @@ -1137,7 +1177,8 @@ class CommonParser: more_vols = True # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: - raise_need_first_vol = True + if not self._part_only: + raise_need_first_vol = True if h.needs_password(): self._needs_password = True @@ -1241,6 +1282,8 @@ class CommonParser: return self._open_unrar(self._rarfile, inf, pwd) def _open_clear(self, inf): + if FORCE_TOOL: + return self._open_unrar(self._rarfile, inf) return DirectReader(self, inf) def _open_hack_core(self, inf, pwd, prefix, suffix): @@ -1288,7 +1331,7 @@ class CommonParser: # not giving filename avoids encoding related problems fn = None if not tmpfile or force_file: - fn = inf.filename + fn = inf.filename.replace("/", os.path.sep) # read from unrar pipe cmd = setup.open_cmdline(pwd, rarfile, fn) @@ -1379,6 +1422,9 @@ class RAR3Parser(CommonParser): buf = fd.read(S_BLK_HDR.size) if not buf: return None + if len(buf) < S_BLK_HDR.size: + self._set_error("Unexpected EOF when reading header") + return None t = S_BLK_HDR.unpack_from(buf) h.header_crc, h.type, h.flags, h.header_size = t @@ -1751,14 +1797,18 @@ class RAR5Parser(CommonParser): # AES encrypted headers _last_aes256_key = (-1, None, None) # (kdf_count, salt, key) + def _get_utf8_password(self): + pwd = self._password + if isinstance(pwd, str): + return pwd.encode("utf8") + return pwd + def _gen_key(self, kdf_count, salt): if self._last_aes256_key[:2] == (kdf_count, salt): return self._last_aes256_key[2] if kdf_count > 24: raise BadRarFile("Too large kdf_count") - pwd = self._password - if isinstance(pwd, str): - pwd = pwd.encode("utf8") + pwd = self._get_utf8_password() key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count) self._last_aes256_key = (kdf_count, salt, key) return key @@ -1776,8 +1826,17 @@ class RAR5Parser(CommonParser): """ header_offset = fd.tell() - preload = 4 + 3 + preload = 4 + 1 start_bytes = fd.read(preload) + if len(start_bytes) < preload: + self._set_error("Unexpected EOF when reading header") + return None + while start_bytes[-1] & 0x80: + b = fd.read(1) + if not b: + self._set_error("Unexpected EOF when reading header") + return None + start_bytes += b header_crc, pos = load_le32(start_bytes, 0) hdrlen, pos = load_vint(start_bytes, pos) if hdrlen > 2 * 1024 * 1024: @@ -1912,15 +1971,39 @@ class RAR5Parser(CommonParser): h.flags |= RAR_ENDARC_NEXT_VOLUME return h + def _check_password(self, check_value, kdf_count_shift, salt): + if len(check_value) != RAR5_PW_CHECK_SIZE + RAR5_PW_SUM_SIZE: + return + + hdr_check = check_value[:RAR5_PW_CHECK_SIZE] + hdr_sum = check_value[RAR5_PW_CHECK_SIZE:] + sum_hash = sha256(hdr_check).digest() + if sum_hash[:RAR5_PW_SUM_SIZE] != hdr_sum: + return + + kdf_count = (1 << kdf_count_shift) + 32 + pwd = self._get_utf8_password() + pwd_hash = pbkdf2_hmac("sha256", pwd, salt, kdf_count) + + pwd_check = bytearray(RAR5_PW_CHECK_SIZE) + len_mask = RAR5_PW_CHECK_SIZE - 1 + for i, v in enumerate(pwd_hash): + pwd_check[i & len_mask] ^= v + + if pwd_check != hdr_check: + raise RarWrongPassword() + def _parse_encryption_block(self, h, hdata, pos): h.encryption_algo, pos = load_vint(hdata, pos) h.encryption_flags, pos = load_vint(hdata, pos) h.encryption_kdf_count, pos = load_byte(hdata, pos) h.encryption_salt, pos = load_bytes(hdata, 16, pos) if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL: - h.encryption_check_value = load_bytes(hdata, 12, pos) + h.encryption_check_value, pos = load_bytes(hdata, 12, pos) if h.encryption_algo != RAR5_XENC_CIPHER_AES256: raise BadRarFile("Unsupported header encryption cipher") + if h.encryption_check_value and self._password: + self._check_password(h.encryption_check_value, h.encryption_kdf_count, h.encryption_salt) self._hdrenc_main = h return h @@ -2157,6 +2240,7 @@ class RarExtFile(io.RawIOBase): _remain = 0 _returncode = 0 _md_context = None + _seeking = False def _open_extfile(self, parser, inf): self.name = inf.filename @@ -2165,7 +2249,10 @@ class RarExtFile(io.RawIOBase): if self._fd: self._fd.close() - md_class = self._inf._md_class or NoHashContext + if self._seeking: + md_class = NoHashContext + else: + md_class = self._inf._md_class or NoHashContext self._md_context = md_class() self._fd = None self._remain = self._inf.file_size @@ -2256,7 +2343,9 @@ class RarExtFile(io.RawIOBase): """ # disable crc check when seeking - self._md_context = NoHashContext() + if not self._seeking: + self._md_context = NoHashContext() + self._seeking = True fsize = self._inf.file_size cur_ofs = self.tell() @@ -2437,6 +2526,11 @@ class DirectReader(RarExtFile): def _read(self, cnt): """Read from potentially multi-volume archive.""" + pos = self._fd.tell() + need = self._cur.data_offset + self._cur.add_size - self._cur_avail + if pos != need: + self._fd.seek(need, 0) + buf = [] while cnt > 0: # next vol needed? @@ -2790,7 +2884,7 @@ def load_le32(buf, pos): end = pos + 4 if end > len(buf): raise BadRarFile("cannot load le32") - return S_LONG.unpack_from(buf, pos)[0], pos + 4 + return S_LONG.unpack_from(buf, pos)[0], end def load_bytes(buf, num, pos): @@ -2833,36 +2927,61 @@ def load_windowstime(buf, pos): return dt, pos +# +# volume numbering +# + +_rc_num = re.compile('^[0-9]+$') + + def _next_newvol(volfile): """New-style next volume """ + name, ext = os.path.splitext(volfile) + if ext.lower() in ("", ".exe", ".sfx"): + volfile = name + ".rar" i = len(volfile) - 1 while i >= 0: - if volfile[i] >= "0" and volfile[i] <= "9": - return _inc_volname(volfile, i) + if "0" <= volfile[i] <= "9": + return _inc_volname(volfile, i, False) + if volfile[i] in ("/", os.sep): + break i -= 1 raise BadRarName("Cannot construct volume name: " + volfile) + def _next_oldvol(volfile): """Old-style next volume """ - # rar -> r00 - if volfile[-4:].lower() == ".rar": - return volfile[:-2] + "00" - return _inc_volname(volfile, len(volfile) - 1) + name, ext = os.path.splitext(volfile) + if ext.lower() in ("", ".exe", ".sfx"): + ext = ".rar" + sfx = ext[2:] + if _rc_num.match(sfx): + ext = _inc_volname(ext, len(ext) - 1, True) + else: + # .rar -> .r00 + ext = ext[:2] + "00" + return name + ext -def _inc_volname(volfile, i): +def _inc_volname(volfile, i, inc_chars): """increase digits with carry, otherwise just increment char """ fn = list(volfile) while i >= 0: - if fn[i] != "9": + if fn[i] == "9": + fn[i] = "0" + i -= 1 + if i < 0: + fn.insert(0, "1") + elif "0" <= fn[i] < "9" or inc_chars: fn[i] = chr(ord(fn[i]) + 1) break - fn[i] = "0" - i -= 1 + else: + fn.insert(i + 1, "1") + break return "".join(fn) @@ -2957,7 +3076,7 @@ def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=N # file header fname = b"data" date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16) - mode = 0x20 + mode = DOS_MODE_ARCHIVE fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, date, vers, meth, len(fname), mode) fhdr += fname @@ -3038,17 +3157,12 @@ def to_datetime(t): pass # sanitize invalid values - mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) mon = max(1, min(mon, 12)) day = max(1, min(day, mday[mon])) h = min(h, 23) m = min(m, 59) s = min(s, 59) - if mon == 2 and day == 29: - try: - return datetime(year, mon, day, h, m, s) - except ValueError: - day = 28 return datetime(year, mon, day, h, m, s) @@ -3075,7 +3189,7 @@ class nsdatetime(datetime): __slots__ = ("nanosecond",) nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999 - def __new__(cls, year, month, day, hour=0, minute=0, second=0, + def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0, nanosecond=0): usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0) if mod == 0: @@ -3165,9 +3279,9 @@ def custom_popen(cmd): creationflags=creationflags) except OSError as ex: if ex.errno == errno.ENOENT: - raise RarCannotExec("Unrar not installed?") + raise RarCannotExec("Unrar not installed?") from None if ex.errno == errno.EACCES or ex.errno == errno.EPERM: - raise RarCannotExec("Cannot execute unrar") + raise RarCannotExec("Cannot execute unrar") from None raise return p @@ -3244,6 +3358,8 @@ class ToolSetup: def get_cmdline(self, key, pwd, nodash=False): cmdline = list(self.setup[key]) cmdline[0] = globals()[cmdline[0]] + if key == "check_cmd": + return cmdline self.add_password_arg(cmdline, pwd) if not nodash: cmdline.append("--") @@ -3259,7 +3375,10 @@ class ToolSetup: if not isinstance(pwd, str): pwd = pwd.decode("utf8") args = self.setup["password"] - if isinstance(args, str): + if args is None: + tool = self.setup["open_cmd"][0] + raise RarCannotExec(f"{tool} does not support passwords") + elif isinstance(args, str): cmdline.append(args + pwd) else: cmdline.extend(args) @@ -3303,10 +3422,30 @@ BSDTAR_CONFIG = { "errmap": [None], } +SEVENZIP_CONFIG = { + "open_cmd": ("SEVENZIP_TOOL", "e", "-so", "-bb0"), + "check_cmd": ("SEVENZIP_TOOL", "i"), + "password": "-p", + "no_password": ("-p",), + "errmap": [None, + RarWarning, RarFatalError, None, None, # 1..4 + None, None, RarUserError, RarMemoryError] # 5..8 +} + +SEVENZIP2_CONFIG = { + "open_cmd": ("SEVENZIP2_TOOL", "e", "-so", "-bb0"), + "check_cmd": ("SEVENZIP2_TOOL", "i"), + "password": "-p", + "no_password": ("-p",), + "errmap": [None, + RarWarning, RarFatalError, None, None, # 1..4 + None, None, RarUserError, RarMemoryError] # 5..8 +} + CURRENT_SETUP = None -def tool_setup(unrar=True, unar=True, bsdtar=True, force=False): +def tool_setup(unrar=True, unar=True, bsdtar=True, sevenzip=True, sevenzip2=True, force=False): """Pick a tool, return cached ToolSetup. """ global CURRENT_SETUP @@ -3319,6 +3458,10 @@ def tool_setup(unrar=True, unar=True, bsdtar=True, force=False): lst.append(UNRAR_CONFIG) if unar: lst.append(UNAR_CONFIG) + if sevenzip: + lst.append(SEVENZIP_CONFIG) + if sevenzip2: + lst.append(SEVENZIP2_CONFIG) if bsdtar: lst.append(BSDTAR_CONFIG) diff --git a/libs/version.txt b/libs/version.txt index e175db66d..ac120f6b4 100644 --- a/libs/version.txt +++ b/libs/version.txt @@ -32,7 +32,7 @@ python-engineio==4.3.4 python-socketio==5.7.2 pytz==2023.3 pytz_deprecation_shim==0.1.0.post0 -rarfile==4.0 +rarfile==4.1 requests==2.28.1 semver==2.13.0 signalrcore==0.9.5