1 # Copyright (C) 2009 Raul Jimenez
2 # Released under GNU LGPL 2.1
3 # See LICENSE.txt for more information
8 logger = logging.getLogger('dht')
10 class LoggingException(Exception):
12 def __init__(self, msg):
13 logger.info('%s: %s' % (self.__class__, msg))
16 class EncodeError(LoggingException):
17 """Raised by encoder when invalid input."""
19 class DecodeError(LoggingException):
20 """Raised by decoder when invalid bencode input."""
21 def __init__(self, msg, bencoded):
22 LoggingException.__init__(self, '\nBencoded: '.join((msg, bencoded)))
24 class RecursionDepthError(DecodeError):
25 """Raised when the bencoded recursivity is too deep.
27 This check prevents from using too much recursivity when an
28 accidentally/maliciously constructed bencoded string looks like
29 'llllllllllllllllllllllllllllllllllll'.
35 output = cStringIO.StringIO()
36 encode_f = _get_encode_f(data)
37 encode_f(data, output)
38 result = output.getvalue()
42 def decode(bencoded, max_depth=4):
44 raise DecodeError('Empty bencoded string', bencoded)
46 decode_f = _get_decode_f(bencoded, 0)
47 data, next_pos, = decode_f(bencoded, 0, max_depth)
48 except (KeyError, IndexError, ValueError):
49 raise DecodeError('UNEXPECTED>>>>>>>>>>>>', bencoded)
51 if next_pos != len(bencoded):
52 raise DecodeError('Extra characters after valid bencode.', bencoded)
56 def _encode_str(data, output):
57 """Encode a string object
60 <string length encoded in base ten ASCII>:<string data>
63 output.write('%d:%s' % (len(data), data))
65 def _encode_int(data, output):
66 """Encode an integer (or long) object
69 i<integer encoded in base ten ASCII>e
72 output.write('i%de' % data)
74 def _encode_list(data, output):
75 """Encode a list object
78 l<bencoded element>...<bencoded element>e
83 encode_f = _get_encode_f(item)
84 encode_f(item, output)
87 def _encode_dict(data, output):
88 """Encode a dict object
91 d<bencoded key><bencoded value>...<bencoded key><bencoded value>e
92 Keys must be string and will be encoded in lexicographical order
99 if type(key) != str: # key must be a string)
100 raise EncodeError, 'Found a non-string key. Data: %r' % data
102 _encode_fs[str](key, output)
103 encode_f = _get_encode_f(value)
104 encode_f(value, output)
110 def _decode_str(bencoded, pos, _):
115 str_len, str_begin = _get_int(bencoded, pos, ':')
116 str_end = str_begin + str_len
117 return (bencoded[str_begin:str_end], str_end)
119 def _decode_int(bencoded, pos, _):
124 return _get_int(bencoded, pos + 1, 'e') # +1 to skip 'i'
126 def _decode_list(bencoded, pos, max_depth):
132 raise RecursionDepthError('maximum recursion depth exceeded', bencoded)
135 next_pos = pos + 1 # skip 'l'
136 while bencoded[next_pos] != 'e':
137 decode_f = _get_decode_f(bencoded, next_pos)
138 item, next_pos = decode_f(bencoded,
139 next_pos, max_depth - 1)
141 return result, next_pos + 1 # correct for 'e'
143 def _decode_dict(bencoded, pos, max_depth):
148 raise RecursionDepthError, 'maximum recursion depth exceeded'
151 next_pos = pos + 1 # skip 'd'
152 while bencoded[next_pos] != 'e':
154 decode_f = _get_decode_f(bencoded, next_pos)
155 if decode_f != _decode_str:
156 raise DecodeError('Keys must be string. Found: <%s>' % (
159 key, next_pos = decode_f(bencoded,
160 next_pos, max_depth - 1)
162 decode_f = _get_decode_f(bencoded, next_pos)
163 value, next_pos = decode_f(bencoded,
164 next_pos, max_depth - 1)
166 return result, next_pos + 1 # skip 'e'
170 def _get_encode_f(value):
172 return _encode_fs[type(value)]
173 except (KeyError), e:
174 raise EncodeError, 'Invalid type: <%r>' % e
176 def _get_int(bencoded, pos, char):
178 end = bencoded.index(char, pos)
180 raise DecodeError('Character %s not found.', bencoded)
182 result = int(bencoded[pos:end])
183 except (ValueError), e:
184 raise DecodeError('Not an integer: %r' %e, bencoded)
185 return (result, end + 1) # +1 to skip final character
187 def _get_decode_f(bencoded, pos):
189 return _decode_fs[bencoded[pos]]
190 except (KeyError), e:
191 raise DecodeError('Caracter in position %d raised %r' % (pos, e),
195 _encode_fs = {str : _encode_str,
198 tuple : _encode_list,
203 _decode_fs = {'i' : _decode_int,
207 _decode_fs[str(i)] = _decode_str