mylar/lib/rtorrent/lib/bencode.py

# Copyright (C) 2011 by clueless <clueless.nospam ! mail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Version: 20111107
#
# Changelog
# ---------
# 2011-11-07  - Added support for Python2 (tested on 2.6)
# 2011-10-03  - Fixed: moved check for end of list at the top of the while loop
#               in _decode_list (in case the list is empty) (Chris Lucas)
#             - Converted dictionary keys to str
# 2011-04-24  - Changed date format to YYYY-MM-DD for versioning, bigger
#			   integer denotes a newer version
#			 - Fixed a bug that would treat False as an integral type but
#			   encode it using the 'False' string, attempting to encode a
#			   boolean now results in an error
#			 - Fixed a bug where an integer value of 0 in a list or
#			   dictionary resulted in a parse error while decoding
#
# 2011-04-03  - Original release

import sys

_py3 = sys.version_info[0] == 3

if _py3:
    _VALID_STRING_TYPES = (str,)
else:
    _VALID_STRING_TYPES = (str, unicode)  # @UndefinedVariable

_TYPE_INT = 1
_TYPE_STRING = 2
_TYPE_LIST = 3
_TYPE_DICTIONARY = 4
_TYPE_END = 5
_TYPE_INVALID = 6

# Function to determine the type of he next value/item
#   Arguments:
#	   char		First character of the string that is to be decoded
#   Return value:
#	   Returns an integer that describes what type the next value/item is


def _gettype(char):
    if not isinstance(char, int):
        char = ord(char)
    if char == 0x6C:						# 'l'
        return _TYPE_LIST
    elif char == 0x64:					  # 'd'
        return _TYPE_DICTIONARY
    elif char == 0x69:					  # 'i'
        return _TYPE_INT
    elif char == 0x65:					  # 'e'
        return _TYPE_END
    elif char >= 0x30 and char <= 0x39:	 # '0' '9'
        return _TYPE_STRING
    else:
        return _TYPE_INVALID

# Function to parse a string from the bendcoded data
#   Arguments:
#	   data		bencoded data, must be guaranteed to be a string
#   Return Value:
#	   Returns a tuple, the first member of the tuple is the parsed string
#	   The second member is whatever remains of the bencoded data so it can
#	   be used to parse the next part of the data


def _decode_string(data):
    end = 1
    # if py3, data[end] is going to be an int
    # if py2, data[end] will be a string
    if _py3:
        char = 0x3A
    else:
        char = chr(0x3A)

    while data[end] != char:  # ':'
        end = end + 1
    strlen = int(data[:end])
    return (data[end + 1:strlen + end + 1], data[strlen + end + 1:])

# Function to parse an integer from the bencoded data
#   Arguments:
#	   data		bencoded data, must be guaranteed to be an integer
#   Return Value:
#	   Returns a tuple, the first member of the tuple is the parsed string
#	   The second member is whatever remains of the bencoded data so it can
#	   be used to parse the next part of the data


def _decode_int(data):
    end = 1
    # if py3, data[end] is going to be an int
    # if py2, data[end] will be a string
    if _py3:
        char = 0x65
    else:
        char = chr(0x65)

    while data[end] != char:	 # 'e'
        end = end + 1
    return (int(data[1:end]), data[end + 1:])

# Function to parse a bencoded list
#   Arguments:
#	   data		bencoded data, must be guaranted to be the start of a list
#   Return Value:
#	   Returns a tuple, the first member of the tuple is the parsed list
#	   The second member is whatever remains of the bencoded data so it can
#	   be used to parse the next part of the data


def _decode_list(data):
    x = []
    overflow = data[1:]
    while True:										 # Loop over the data
        if _gettype(overflow[0]) == _TYPE_END:		  # - Break if we reach the end of the list
            return (x, overflow[1:])  # and return the list and overflow

        value, overflow = _decode(overflow)			 #
        if isinstance(value, bool) or overflow == '':   # - if we have a parse error
            return (False, False)  # Die with error
        else:										   # - Otherwise
            x.append(value)  # add the value to the list


# Function to parse a bencoded list
#   Arguments:
#	   data		bencoded data, must be guaranted to be the start of a list
#   Return Value:
#	   Returns a tuple, the first member of the tuple is the parsed dictionary
#	   The second member is whatever remains of the bencoded data so it can
#	   be used to parse the next part of the data
def _decode_dict(data):
    x = {}
    overflow = data[1:]
    while True:										 # Loop over the data
        if _gettype(overflow[0]) != _TYPE_STRING:	   # - If the key is not a string
            return (False, False)  # Die with error
        key, overflow = _decode(overflow)			   #
        if key == False or overflow == '':			  # - If parse error
            return (False, False)  # Die with error
        value, overflow = _decode(overflow)			 #
        if isinstance(value, bool) or overflow == '':   # - If parse error
            print("Error parsing value")
            print(value)
            print(overflow)
            return (False, False)  # Die with error
        else:
            # don't use bytes for the key
            key = key.decode()
            x[key] = value
        if _gettype(overflow[0]) == _TYPE_END:
            return (x, overflow[1:])

#   Arguments:
#	   data		bencoded data in bytes format
#   Return Values:
#	   Returns a tuple, the first member is the parsed data, could be a string,
#	   an integer, a list or a dictionary, or a combination of those
#	   The second member is the leftover of parsing, if everything parses correctly this
#	   should be an empty byte string


def _decode(data):
    btype = _gettype(data[0])
    if btype == _TYPE_INT:
        return _decode_int(data)
    elif btype == _TYPE_STRING:
        return _decode_string(data)
    elif btype == _TYPE_LIST:
        return _decode_list(data)
    elif btype == _TYPE_DICTIONARY:
        return _decode_dict(data)
    else:
        return (False, False)

# Function to decode bencoded data
#   Arguments:
#	   data		bencoded data, can be str or bytes
#   Return Values:
#	   Returns the decoded data on success, this coud be bytes, int, dict or list
#	   or a combinatin of those
#	   If an error occurs the return value is False


def decode(data):
    # if isinstance(data, str):
    #	data = data.encode()
    decoded, overflow = _decode(data)
    return decoded

#   Args: data as integer
# return: encoded byte string


def _encode_int(data):
    return b'i' + str(data).encode() + b'e'

#   Args: data as string or bytes
# Return: encoded byte string


def _encode_string(data):
    return str(len(data)).encode() + b':' + data

#   Args: data as list
# Return: Encoded byte string, false on error


def _encode_list(data):
    elist = b'l'
    for item in data:
        eitem = encode(item)
        if eitem == False:
            return False
        elist += eitem
    return elist + b'e'

#   Args: data as dict
# Return: encoded byte string, false on error


def _encode_dict(data):
    edict = b'd'
    keys = []
    for key in data:
        if not isinstance(key, _VALID_STRING_TYPES) and not isinstance(key, bytes):
            return False
        keys.append(key)
    keys.sort()
    for key in keys:
        ekey = encode(key)
        eitem = encode(data[key])
        if ekey == False or eitem == False:
            return False
        edict += ekey + eitem
    return edict + b'e'

# Function to encode a variable in bencoding
#   Arguments:
#	   data		Variable to be encoded, can be a list, dict, str, bytes, int or a combination of those
#   Return Values:
#	   Returns the encoded data as a byte string when successful
#	   If an error occurs the return value is False


def encode(data):
    if isinstance(data, bool):
        return False
    elif isinstance(data, int):
        return _encode_int(data)
    elif isinstance(data, bytes):
        return _encode_string(data)
    elif isinstance(data, _VALID_STRING_TYPES):
        return _encode_string(data.encode())
    elif isinstance(data, list):
        return _encode_list(data)
    elif isinstance(data, dict):
        return _encode_dict(data)
    else:
        return False
FIX: When retrieving feeds from 32p and in Auth mode, personal notification feeds contained some invalid html entries that weren't removed properly resulting in no results when attempting to match for downloading, FIX: When searching 32P, if title had a '/' within the title - Mylar would mistakingly skip it due to some previous exceptions that were made for CBT, FIX: Main page would quickly display & hide the have% column instead of always being hidden, FIX: Adjusted some incorrect spacing for non-alphanumeric characters when comparing search results (should result in better matching hopefully), FIX: When adding a series and the most recent issue was present on the weekly-pull list, it would sometimes not mark it as Wanted and auto-attempt to search for it (if auto mark Upcoming enabled), FIX: Added Test Connection button for 32P where it will test logon credentials as well as if Captcha is present, IMP: If captcha is enabled for 32p and signon is required because keys are stale, will not send authentication information and will just bypass as a provider, IMP: Test Connection button added for SABnzbd, IMP: Added ability to directly add torrents to rtorrent and apply label + download directory options (config.ini only atm), FIX: If a search result had a 'vol.' label in it, depending on how the format of the label was mylar would refuse to remove the volume which resulted in failed matches (also fixed a similar issue with failing to remove the volume label when comparing search results), FIX: When filechecking, if a series had a - in the title, will now account for it properly, IMP: Completely redid the filecheck module which allows for integration into other modules as well as more detailed failure logs, IMP: Added Dynamic handder integration into filechecker and subsequent modules that use it which allows for special characters to be replaced with any other type of character, IMP: Manual post-processing speed improved greatly due to new usage of filecheck module, IMP: Importer backend code redone to include new filecheck module, IMP: Added status/counter to import process, IMP: Added force unlock option to importer for failed imports, IMP: Added new status to Import labelled as 'Manual Intervention' for imports that need the user to manually select an option from an available list, FIX: When import said there were search results to view, but none available - would blank screen, IMP: Added a failure log entry showing all the failed files that weren't able to be scanned in during an import (will be in GUI eventually), IMP: if only partial metadata is available during import, Mylar will attempt to use what's available from the metatagging instead of picking all of one/other, IMP: Better grouping of series/volumes when viewing the import results page as well as now indicating if annuals are present within the files, IMP: Added a file-icon beside each imported item on the import result page which allows the user to view the files that are associated with the given series grouping, IMP: Added a blacklisted_publishers option to config.ini which will blacklist specific publishers from being returned during search / import results, FIX: If duplicate dump folder had a value, but duplicate dump wasn't enabled - would still use the duplicate dump folder during post-processing runs, FIX: (#1194) Patch to allow for fixed H1 elements for title (thnx chazlarson), FIX: Removed UnRAR dependency checks in cmtagmylar since not being used anymore, FIX: Fixed a problem with non-ascii characters being recognized during a file-check in certain cases, IMP: Attmept by Mylar to grab an alternate jpg from file when viewing the issue details if it complies with the naming conventions, FIX: Fixed some metatagging issues with ComicBookLover tags not being handled properly if they didn't exist, IMP: Dupecheck now has a failback if it's comparing a cbr/cbr, cbz/cbz and cbr/cbz-priority is enabled, FIX: Quick check added for when adding/refreshing a comic that if a cover already existed, it would delete the cover prior to the attempt to retrieve it, IMP: Added some additional handling for when searching/adding fails, FIX: If a story arc didn't have proper issue dates (or invalid ones) would error out on loading the story arc main page - usually when arcs were imported using a cbl file. 2016-04-07 17:09:06 +00:00			`# Copyright (C) 2011 by clueless <clueless.nospam ! mail.com>`
			`#`
			`# Permission is hereby granted, free of charge, to any person obtaining a copy`
			`# of this software and associated documentation files (the "Software"), to deal`
			`# in the Software without restriction, including without limitation the rights`
			`# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`# copies of the Software, and to permit persons to whom the Software is`
			`# furnished to do so, subject to the following conditions:`
			`#`
			`# The above copyright notice and this permission notice shall be included in`
			`# all copies or substantial portions of the Software.`
			`#`
			`# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN`
			`# THE SOFTWARE.`
			`#`
			`# Version: 20111107`
			`#`
			`# Changelog`
			`# ---------`
			`# 2011-11-07 - Added support for Python2 (tested on 2.6)`
			`# 2011-10-03 - Fixed: moved check for end of list at the top of the while loop`
			`# in _decode_list (in case the list is empty) (Chris Lucas)`
			`# - Converted dictionary keys to str`
			`# 2011-04-24 - Changed date format to YYYY-MM-DD for versioning, bigger`
			`# integer denotes a newer version`
			`# - Fixed a bug that would treat False as an integral type but`
			`# encode it using the 'False' string, attempting to encode a`
			`# boolean now results in an error`
			`# - Fixed a bug where an integer value of 0 in a list or`
			`# dictionary resulted in a parse error while decoding`
			`#`
			`# 2011-04-03 - Original release`

			`import sys`

			`_py3 = sys.version_info[0] == 3`

			`if _py3:`
			`_VALID_STRING_TYPES = (str,)`
			`else:`
			`_VALID_STRING_TYPES = (str, unicode) # @UndefinedVariable`

			`_TYPE_INT = 1`
			`_TYPE_STRING = 2`
			`_TYPE_LIST = 3`
			`_TYPE_DICTIONARY = 4`
			`_TYPE_END = 5`
			`_TYPE_INVALID = 6`

			`# Function to determine the type of he next value/item`
			`# Arguments:`
			`# char First character of the string that is to be decoded`
			`# Return value:`
			`# Returns an integer that describes what type the next value/item is`


			`def _gettype(char):`
			`if not isinstance(char, int):`
			`char = ord(char)`
			`if char == 0x6C: # 'l'`
			`return _TYPE_LIST`
			`elif char == 0x64: # 'd'`
			`return _TYPE_DICTIONARY`
			`elif char == 0x69: # 'i'`
			`return _TYPE_INT`
			`elif char == 0x65: # 'e'`
			`return _TYPE_END`
			`elif char >= 0x30 and char <= 0x39: # '0' '9'`
			`return _TYPE_STRING`
			`else:`
			`return _TYPE_INVALID`

			`# Function to parse a string from the bendcoded data`
			`# Arguments:`
			`# data bencoded data, must be guaranteed to be a string`
			`# Return Value:`
			`# Returns a tuple, the first member of the tuple is the parsed string`
			`# The second member is whatever remains of the bencoded data so it can`
			`# be used to parse the next part of the data`


			`def _decode_string(data):`
			`end = 1`
			`# if py3, data[end] is going to be an int`
			`# if py2, data[end] will be a string`
			`if _py3:`
			`char = 0x3A`
			`else:`
			`char = chr(0x3A)`

			`while data[end] != char: # ':'`
			`end = end + 1`
			`strlen = int(data[:end])`
			`return (data[end + 1:strlen + end + 1], data[strlen + end + 1:])`

			`# Function to parse an integer from the bencoded data`
			`# Arguments:`
			`# data bencoded data, must be guaranteed to be an integer`
			`# Return Value:`
			`# Returns a tuple, the first member of the tuple is the parsed string`
			`# The second member is whatever remains of the bencoded data so it can`
			`# be used to parse the next part of the data`


			`def _decode_int(data):`
			`end = 1`
			`# if py3, data[end] is going to be an int`
			`# if py2, data[end] will be a string`
			`if _py3:`
			`char = 0x65`
			`else:`
			`char = chr(0x65)`

			`while data[end] != char: # 'e'`
			`end = end + 1`
			`return (int(data[1:end]), data[end + 1:])`

			`# Function to parse a bencoded list`
			`# Arguments:`
			`# data bencoded data, must be guaranted to be the start of a list`
			`# Return Value:`
			`# Returns a tuple, the first member of the tuple is the parsed list`
			`# The second member is whatever remains of the bencoded data so it can`
			`# be used to parse the next part of the data`


			`def _decode_list(data):`
			`x = []`
			`overflow = data[1:]`
			`while True: # Loop over the data`
			`if _gettype(overflow[0]) == _TYPE_END: # - Break if we reach the end of the list`
			`return (x, overflow[1:]) # and return the list and overflow`

			`value, overflow = _decode(overflow) #`
			`if isinstance(value, bool) or overflow == '': # - if we have a parse error`
			`return (False, False) # Die with error`
			`else: # - Otherwise`
			`x.append(value) # add the value to the list`


			`# Function to parse a bencoded list`
			`# Arguments:`
			`# data bencoded data, must be guaranted to be the start of a list`
			`# Return Value:`
			`# Returns a tuple, the first member of the tuple is the parsed dictionary`
			`# The second member is whatever remains of the bencoded data so it can`
			`# be used to parse the next part of the data`
			`def _decode_dict(data):`
			`x = {}`
			`overflow = data[1:]`
			`while True: # Loop over the data`
			`if _gettype(overflow[0]) != _TYPE_STRING: # - If the key is not a string`
			`return (False, False) # Die with error`
			`key, overflow = _decode(overflow) #`
			`if key == False or overflow == '': # - If parse error`
			`return (False, False) # Die with error`
			`value, overflow = _decode(overflow) #`
			`if isinstance(value, bool) or overflow == '': # - If parse error`
			`print("Error parsing value")`
			`print(value)`
			`print(overflow)`
			`return (False, False) # Die with error`
			`else:`
			`# don't use bytes for the key`
			`key = key.decode()`
			`x[key] = value`
			`if _gettype(overflow[0]) == _TYPE_END:`
			`return (x, overflow[1:])`

			`# Arguments:`
			`# data bencoded data in bytes format`
			`# Return Values:`
			`# Returns a tuple, the first member is the parsed data, could be a string,`
			`# an integer, a list or a dictionary, or a combination of those`
			`# The second member is the leftover of parsing, if everything parses correctly this`
			`# should be an empty byte string`


			`def _decode(data):`
			`btype = _gettype(data[0])`
			`if btype == _TYPE_INT:`
			`return _decode_int(data)`
			`elif btype == _TYPE_STRING:`
			`return _decode_string(data)`
			`elif btype == _TYPE_LIST:`
			`return _decode_list(data)`
			`elif btype == _TYPE_DICTIONARY:`
			`return _decode_dict(data)`
			`else:`
			`return (False, False)`

			`# Function to decode bencoded data`
			`# Arguments:`
			`# data bencoded data, can be str or bytes`
			`# Return Values:`
			`# Returns the decoded data on success, this coud be bytes, int, dict or list`
			`# or a combinatin of those`
			`# If an error occurs the return value is False`


			`def decode(data):`
			`# if isinstance(data, str):`
			`# data = data.encode()`
			`decoded, overflow = _decode(data)`
			`return decoded`

			`# Args: data as integer`
			`# return: encoded byte string`


			`def _encode_int(data):`
			`return b'i' + str(data).encode() + b'e'`

			`# Args: data as string or bytes`
			`# Return: encoded byte string`


			`def _encode_string(data):`
			`return str(len(data)).encode() + b':' + data`

			`# Args: data as list`
			`# Return: Encoded byte string, false on error`


			`def _encode_list(data):`
			`elist = b'l'`
			`for item in data:`
			`eitem = encode(item)`
			`if eitem == False:`
			`return False`
			`elist += eitem`
			`return elist + b'e'`

			`# Args: data as dict`
			`# Return: encoded byte string, false on error`


			`def _encode_dict(data):`
			`edict = b'd'`
			`keys = []`
			`for key in data:`
			`if not isinstance(key, _VALID_STRING_TYPES) and not isinstance(key, bytes):`
			`return False`
			`keys.append(key)`
			`keys.sort()`
			`for key in keys:`
			`ekey = encode(key)`
			`eitem = encode(data[key])`
			`if ekey == False or eitem == False:`
			`return False`
			`edict += ekey + eitem`
			`return edict + b'e'`

			`# Function to encode a variable in bencoding`
			`# Arguments:`
			`# data Variable to be encoded, can be a list, dict, str, bytes, int or a combination of those`
			`# Return Values:`
			`# Returns the encoded data as a byte string when successful`
			`# If an error occurs the return value is False`


			`def encode(data):`
			`if isinstance(data, bool):`
			`return False`
			`elif isinstance(data, int):`
			`return _encode_int(data)`
			`elif isinstance(data, bytes):`
			`return _encode_string(data)`
			`elif isinstance(data, _VALID_STRING_TYPES):`
			`return _encode_string(data.encode())`
			`elif isinstance(data, list):`
			`return _encode_list(data)`
			`elif isinstance(data, dict):`
			`return _encode_dict(data)`
			`else:`
			`return False`