# Copyright (C) 2011 by clueless # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # Version: 20111107 # # Changelog # --------- # 2011-11-07 - Added support for Python2 (tested on 2.6) # 2011-10-03 - Fixed: moved check for end of list at the top of the while loop # in _decode_list (in case the list is empty) (Chris Lucas) # - Converted dictionary keys to str # 2011-04-24 - Changed date format to YYYY-MM-DD for versioning, bigger # integer denotes a newer version # - Fixed a bug that would treat False as an integral type but # encode it using the 'False' string, attempting to encode a # boolean now results in an error # - Fixed a bug where an integer value of 0 in a list or # dictionary resulted in a parse error while decoding # # 2011-04-03 - Original release import sys _py3 = sys.version_info[0] == 3 if _py3: _VALID_STRING_TYPES = (str,) else: _VALID_STRING_TYPES = (str, unicode) # @UndefinedVariable _TYPE_INT = 1 _TYPE_STRING = 2 _TYPE_LIST = 3 _TYPE_DICTIONARY = 4 _TYPE_END = 5 _TYPE_INVALID = 6 # Function to determine the type of he next value/item # Arguments: # char First character of the string that is to be decoded # Return value: # Returns an integer that describes what type the next value/item is def _gettype(char): if not isinstance(char, int): char = ord(char) if char == 0x6C: # 'l' return _TYPE_LIST elif char == 0x64: # 'd' return _TYPE_DICTIONARY elif char == 0x69: # 'i' return _TYPE_INT elif char == 0x65: # 'e' return _TYPE_END elif char >= 0x30 and char <= 0x39: # '0' '9' return _TYPE_STRING else: return _TYPE_INVALID # Function to parse a string from the bendcoded data # Arguments: # data bencoded data, must be guaranteed to be a string # Return Value: # Returns a tuple, the first member of the tuple is the parsed string # The second member is whatever remains of the bencoded data so it can # be used to parse the next part of the data def _decode_string(data): end = 1 # if py3, data[end] is going to be an int # if py2, data[end] will be a string if _py3: char = 0x3A else: char = chr(0x3A) while data[end] != char: # ':' end = end + 1 strlen = int(data[:end]) return (data[end + 1:strlen + end + 1], data[strlen + end + 1:]) # Function to parse an integer from the bencoded data # Arguments: # data bencoded data, must be guaranteed to be an integer # Return Value: # Returns a tuple, the first member of the tuple is the parsed string # The second member is whatever remains of the bencoded data so it can # be used to parse the next part of the data def _decode_int(data): end = 1 # if py3, data[end] is going to be an int # if py2, data[end] will be a string if _py3: char = 0x65 else: char = chr(0x65) while data[end] != char: # 'e' end = end + 1 return (int(data[1:end]), data[end + 1:]) # Function to parse a bencoded list # Arguments: # data bencoded data, must be guaranted to be the start of a list # Return Value: # Returns a tuple, the first member of the tuple is the parsed list # The second member is whatever remains of the bencoded data so it can # be used to parse the next part of the data def _decode_list(data): x = [] overflow = data[1:] while True: # Loop over the data if _gettype(overflow[0]) == _TYPE_END: # - Break if we reach the end of the list return (x, overflow[1:]) # and return the list and overflow value, overflow = _decode(overflow) # if isinstance(value, bool) or overflow == '': # - if we have a parse error return (False, False) # Die with error else: # - Otherwise x.append(value) # add the value to the list # Function to parse a bencoded list # Arguments: # data bencoded data, must be guaranted to be the start of a list # Return Value: # Returns a tuple, the first member of the tuple is the parsed dictionary # The second member is whatever remains of the bencoded data so it can # be used to parse the next part of the data def _decode_dict(data): x = {} overflow = data[1:] while True: # Loop over the data if _gettype(overflow[0]) != _TYPE_STRING: # - If the key is not a string return (False, False) # Die with error key, overflow = _decode(overflow) # if key == False or overflow == '': # - If parse error return (False, False) # Die with error value, overflow = _decode(overflow) # if isinstance(value, bool) or overflow == '': # - If parse error print("Error parsing value") print(value) print(overflow) return (False, False) # Die with error else: # don't use bytes for the key key = key.decode() x[key] = value if _gettype(overflow[0]) == _TYPE_END: return (x, overflow[1:]) # Arguments: # data bencoded data in bytes format # Return Values: # Returns a tuple, the first member is the parsed data, could be a string, # an integer, a list or a dictionary, or a combination of those # The second member is the leftover of parsing, if everything parses correctly this # should be an empty byte string def _decode(data): btype = _gettype(data[0]) if btype == _TYPE_INT: return _decode_int(data) elif btype == _TYPE_STRING: return _decode_string(data) elif btype == _TYPE_LIST: return _decode_list(data) elif btype == _TYPE_DICTIONARY: return _decode_dict(data) else: return (False, False) # Function to decode bencoded data # Arguments: # data bencoded data, can be str or bytes # Return Values: # Returns the decoded data on success, this coud be bytes, int, dict or list # or a combinatin of those # If an error occurs the return value is False def decode(data): # if isinstance(data, str): # data = data.encode() decoded, overflow = _decode(data) return decoded # Args: data as integer # return: encoded byte string def _encode_int(data): return b'i' + str(data).encode() + b'e' # Args: data as string or bytes # Return: encoded byte string def _encode_string(data): return str(len(data)).encode() + b':' + data # Args: data as list # Return: Encoded byte string, false on error def _encode_list(data): elist = b'l' for item in data: eitem = encode(item) if eitem == False: return False elist += eitem return elist + b'e' # Args: data as dict # Return: encoded byte string, false on error def _encode_dict(data): edict = b'd' keys = [] for key in data: if not isinstance(key, _VALID_STRING_TYPES) and not isinstance(key, bytes): return False keys.append(key) keys.sort() for key in keys: ekey = encode(key) eitem = encode(data[key]) if ekey == False or eitem == False: return False edict += ekey + eitem return edict + b'e' # Function to encode a variable in bencoding # Arguments: # data Variable to be encoded, can be a list, dict, str, bytes, int or a combination of those # Return Values: # Returns the encoded data as a byte string when successful # If an error occurs the return value is False def encode(data): if isinstance(data, bool): return False elif isinstance(data, int): return _encode_int(data) elif isinstance(data, bytes): return _encode_string(data) elif isinstance(data, _VALID_STRING_TYPES): return _encode_string(data.encode()) elif isinstance(data, list): return _encode_list(data) elif isinstance(data, dict): return _encode_dict(data) else: return False