mylar/lib/bencode.py

282 lines
8.9 KiB
Python

# Copyright (C) 2011 by clueless <clueless.nospam ! mail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Version: 20111107
#
# Changelog
# ---------
# 2011-11-07 - Added support for Python2 (tested on 2.6)
# 2011-10-03 - Fixed: moved check for end of list at the top of the while loop
# in _decode_list (in case the list is empty) (Chris Lucas)
# - Converted dictionary keys to str
# 2011-04-24 - Changed date format to YYYY-MM-DD for versioning, bigger
# integer denotes a newer version
# - Fixed a bug that would treat False as an integral type but
# encode it using the 'False' string, attempting to encode a
# boolean now results in an error
# - Fixed a bug where an integer value of 0 in a list or
# dictionary resulted in a parse error while decoding
#
# 2011-04-03 - Original release
import sys
_py3 = sys.version_info[0] == 3
if _py3:
_VALID_STRING_TYPES = (str,)
else:
_VALID_STRING_TYPES = (str, unicode) # @UndefinedVariable
_TYPE_INT = 1
_TYPE_STRING = 2
_TYPE_LIST = 3
_TYPE_DICTIONARY = 4
_TYPE_END = 5
_TYPE_INVALID = 6
# Function to determine the type of he next value/item
# Arguments:
# char First character of the string that is to be decoded
# Return value:
# Returns an integer that describes what type the next value/item is
def _gettype(char):
if not isinstance(char, int):
char = ord(char)
if char == 0x6C: # 'l'
return _TYPE_LIST
elif char == 0x64: # 'd'
return _TYPE_DICTIONARY
elif char == 0x69: # 'i'
return _TYPE_INT
elif char == 0x65: # 'e'
return _TYPE_END
elif char >= 0x30 and char <= 0x39: # '0' '9'
return _TYPE_STRING
else:
return _TYPE_INVALID
# Function to parse a string from the bendcoded data
# Arguments:
# data bencoded data, must be guaranteed to be a string
# Return Value:
# Returns a tuple, the first member of the tuple is the parsed string
# The second member is whatever remains of the bencoded data so it can
# be used to parse the next part of the data
def _decode_string(data):
end = 1
# if py3, data[end] is going to be an int
# if py2, data[end] will be a string
if _py3:
char = 0x3A
else:
char = chr(0x3A)
while data[end] != char: # ':'
end = end + 1
strlen = int(data[:end])
return (data[end + 1:strlen + end + 1], data[strlen + end + 1:])
# Function to parse an integer from the bencoded data
# Arguments:
# data bencoded data, must be guaranteed to be an integer
# Return Value:
# Returns a tuple, the first member of the tuple is the parsed string
# The second member is whatever remains of the bencoded data so it can
# be used to parse the next part of the data
def _decode_int(data):
end = 1
# if py3, data[end] is going to be an int
# if py2, data[end] will be a string
if _py3:
char = 0x65
else:
char = chr(0x65)
while data[end] != char: # 'e'
end = end + 1
return (int(data[1:end]), data[end + 1:])
# Function to parse a bencoded list
# Arguments:
# data bencoded data, must be guaranted to be the start of a list
# Return Value:
# Returns a tuple, the first member of the tuple is the parsed list
# The second member is whatever remains of the bencoded data so it can
# be used to parse the next part of the data
def _decode_list(data):
x = []
overflow = data[1:]
while True: # Loop over the data
if _gettype(overflow[0]) == _TYPE_END: # - Break if we reach the end of the list
return (x, overflow[1:]) # and return the list and overflow
value, overflow = _decode(overflow) #
if isinstance(value, bool) or overflow == '': # - if we have a parse error
return (False, False) # Die with error
else: # - Otherwise
x.append(value) # add the value to the list
# Function to parse a bencoded list
# Arguments:
# data bencoded data, must be guaranted to be the start of a list
# Return Value:
# Returns a tuple, the first member of the tuple is the parsed dictionary
# The second member is whatever remains of the bencoded data so it can
# be used to parse the next part of the data
def _decode_dict(data):
x = {}
overflow = data[1:]
while True: # Loop over the data
if _gettype(overflow[0]) != _TYPE_STRING: # - If the key is not a string
return (False, False) # Die with error
key, overflow = _decode(overflow) #
if key == False or overflow == '': # - If parse error
return (False, False) # Die with error
value, overflow = _decode(overflow) #
if isinstance(value, bool) or overflow == '': # - If parse error
print("Error parsing value")
print(value)
print(overflow)
return (False, False) # Die with error
else:
# don't use bytes for the key
key = key.decode()
x[key] = value
if _gettype(overflow[0]) == _TYPE_END:
return (x, overflow[1:])
# Arguments:
# data bencoded data in bytes format
# Return Values:
# Returns a tuple, the first member is the parsed data, could be a string,
# an integer, a list or a dictionary, or a combination of those
# The second member is the leftover of parsing, if everything parses correctly this
# should be an empty byte string
def _decode(data):
btype = _gettype(data[0])
if btype == _TYPE_INT:
return _decode_int(data)
elif btype == _TYPE_STRING:
return _decode_string(data)
elif btype == _TYPE_LIST:
return _decode_list(data)
elif btype == _TYPE_DICTIONARY:
return _decode_dict(data)
else:
return (False, False)
# Function to decode bencoded data
# Arguments:
# data bencoded data, can be str or bytes
# Return Values:
# Returns the decoded data on success, this coud be bytes, int, dict or list
# or a combinatin of those
# If an error occurs the return value is False
def decode(data):
# if isinstance(data, str):
# data = data.encode()
decoded, overflow = _decode(data)
return decoded
# Args: data as integer
# return: encoded byte string
def _encode_int(data):
return b'i' + str(data).encode() + b'e'
# Args: data as string or bytes
# Return: encoded byte string
def _encode_string(data):
return str(len(data)).encode() + b':' + data
# Args: data as list
# Return: Encoded byte string, false on error
def _encode_list(data):
elist = b'l'
for item in data:
eitem = encode(item)
if eitem == False:
return False
elist += eitem
return elist + b'e'
# Args: data as dict
# Return: encoded byte string, false on error
def _encode_dict(data):
edict = b'd'
keys = []
for key in data:
if not isinstance(key, _VALID_STRING_TYPES) and not isinstance(key, bytes):
return False
keys.append(key)
keys.sort()
for key in keys:
ekey = encode(key)
eitem = encode(data[key])
if ekey == False or eitem == False:
return False
edict += ekey + eitem
return edict + b'e'
# Function to encode a variable in bencoding
# Arguments:
# data Variable to be encoded, can be a list, dict, str, bytes, int or a combination of those
# Return Values:
# Returns the encoded data as a byte string when successful
# If an error occurs the return value is False
def encode(data):
if isinstance(data, bool):
return False
elif isinstance(data, int):
return _encode_int(data)
elif isinstance(data, bytes):
return _encode_string(data)
elif isinstance(data, _VALID_STRING_TYPES):
return _encode_string(data.encode())
elif isinstance(data, list):
return _encode_list(data)
elif isinstance(data, dict):
return _encode_dict(data)
else:
return False