From 0adab73682cfb8473f7515ea216b04ff12912da2 Mon Sep 17 00:00:00 2001 From: Razvan Deaconescu Date: Sat, 21 Nov 2009 21:10:30 +0200 Subject: [PATCH] fully functional libtorrent log parser --- log-parser/libtorrent/LogParser.py | 566 ++++++++++++++++++++++++++++- 1 file changed, 565 insertions(+), 1 deletion(-) diff --git a/log-parser/libtorrent/LogParser.py b/log-parser/libtorrent/LogParser.py index 96ba9b9..a162e29 100644 --- a/log-parser/libtorrent/LogParser.py +++ b/log-parser/libtorrent/LogParser.py @@ -1,6 +1,570 @@ #!/usr/bin/env python +# +# Parser for libtorrent-rasterbar verbose messages +# http://www.rasterbar.com/products/libtorrent/ +# +# author: 2009, Adriana Draghici, adriana008@gmail.com +# updates: November 2009, Razvan Deaconescu, razvan.deaconescu@cs.pub.ro +# + import sys from DatabaseWriter import DatabaseWriter +from DatabaseCommander import DatabaseCommander +import julian +import datetime +import time +import getopt +import re +import socket +import string +import os + +# the names used by Tribler for the BitTorrent messages +bt_msg_types = {"CHOKE": 0, "UNCHOKE": 1, "INTERESTED": 2, + "NOT_INTERESTED": 3, "HAVE": 4, "BITFIELD": 5, + "REQUEST": 6, "PIECE": 7, "CANCEL": 8, "DHT_PORT": 9} + +log_msg_dir = {"RECEIVE": 0, "SEND": 1} + +DEBUG = False + +LOG_YEAR=2009 + +# +# convert string "Mon DD HH:MM:SS" to datetime +# + +def string_to_timestamp(date_string): + try: + my_time = time.strptime(date_string + " %s" % (LOG_YEAR), "%b %d %H:%M:%S %Y") + my_date = datetime.datetime(my_time[0], my_time[1], my_time[2], my_time[3], my_time[4], my_time[5], my_time[6]) + except ValueError: + print "Invalid date:", date_string + + return my_date + +# +# parse choke line in libtorrent log file +# +# sample line +# Jan 08 22:39:50 <== CHOKE +# + +def libtorrent_parse_choke(line): + if string.find(line, "<== CHOKE ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> CHOKE ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match CHOKE" + + msg_type = bt_msg_types["CHOKE"] + parts = re.split("[<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = 0 + begin = 0 + length = 0 + port = 0 + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# parse unchoke line in libtorrent log file +# +# sample line +# Jan 08 22:40:00 <== UNCHOKE + +def libtorrent_parse_unchoke(line): + if string.find(line, "<== UNCHOKE ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> UNCHOKE ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match UNCHOKE" + + msg_type = bt_msg_types["UNCHOKE"] + parts = re.split("[<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = 0 + begin = 0 + length = 0 + port = 0 + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# parse interested line in libtorrent log file +# +# sample line +# Jan 08 22:20:48 ==> INTERESTED +# + +def libtorrent_parse_interested(line): + if string.find(line, "<== INTERESTED ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> INTERESTED ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match INTERESTED" + + msg_type = bt_msg_types["INTERESTED"] + parts = re.split("[<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = 0 + begin = 0 + length = 0 + port = 0 + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# parse not interested line in libtorrent log file +# +# sample line +# Jan 08 22:39:49 ==> NOT_INTERESTED +# + +def libtorrent_parse_not_interested(line): + if string.find(line, "<== NOT_INTERESTED ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> NOT_INTERESTED ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match NOT_INTERESTED" + + msg_type = bt_msg_types["NOT_INTERESTED"] + parts = re.split("[<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = 0 + begin = 0 + length = 0 + port = 0 + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# parse have line in libtorrent log file +# +# sample line +# Jan 08 22:20:48 <== HAVE [ piece: 839] +# + +def libtorrent_parse_have(line): + if string.find(line, "<== HAVE ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> HAVE ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match HAVE" + + msg_type = bt_msg_types["HAVE"] + parts = re.split("[\[\]<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = int("0x" + re.split(":", parts[2].strip())[1].strip(), 16) + begin = 0 + length = 0 + port = 0 + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# parse bitfield line in libtorrent log file +# +# sample line +# Jan 08 22:20:48 ==> BITFIELD 00000... +# + +def libtorrent_parse_bitfield(line): + if string.find(line, "<== BITFIELD ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> BITFIELD ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match BITFIELD" + + msg_type = bt_msg_types["BITFIELD"] + parts = re.split("[<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = 0 + begin = 0 + length = 0 + port = 0 + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# parse request line in libtorrent log file +# +# sample line +# Jan 08 22:39:50 <== REQUEST [ piece: 6cc | s: 14000 | l: 4000 ] +# + +def libtorrent_parse_request(line): + if string.find(line, "<== REQUEST ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> REQUEST ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match REQUEST" + + msg_type = bt_msg_types["REQUEST"] + parts = re.split("[\[\]|<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = int("0x" + re.split(":", parts[2])[1].strip(), 16) + begin = int("0x" + re.split(":", parts[3])[1].strip(), 16) + length = int("0x" + re.split(":", parts[4])[1].strip(), 16) + + port = 0 + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# parse piece line in libtorrent log file +# +# sample line +# Jan 08 22:39:50 ==> PIECE [ piece: 5c6 | s: 24000 | l: 4000 ] +# + +def libtorrent_parse_piece(line): + if string.find(line, "<== PIECE ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> PIECE ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match PIECE" + + msg_type = bt_msg_types["PIECE"] + parts = re.split("[\[\]|<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = int("0x" + re.split(":", parts[2])[1].strip(), 16) + begin = int("0x" + re.split(":", parts[3])[1].strip(), 16) + length = int("0x" + re.split(":", parts[4])[1].strip(), 16) + port = 0 + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# no cancel line in libtorrent log files +# + +def libtorrent_parse_cancel(line): + return None + +# +# parse allowed fast line in libtorrent log file +# +# sample line +# Jan 08 22:20:48 ==> ALLOWED_FAST [ 2098 ] +# + +def libtorrent_parse_allowed_fast(line): + if string.find(line, "<== ALLOWED_FAST ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> ALLOWED_FAST ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match ALLOWED_FAST" + + msg_type = bt_msg_types["ALLOWED_FAST"] + parts = re.split("[\[\]<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = 0 + begin = 0 + length = 0 + port = int(parts[2].strip()) + + return (timestamp, direction, msg_type, index, begin, length, port) + +# +# parse DHT port line in libtorrent log file +# +# sample line +# Jan 08 22:20:48 ==> DHT_PORT [ 50200 ] +# + +def libtorrent_parse_port(line): + if string.find(line, "<== DHT_PORT ") != -1: + direction = log_msg_dir["RECEIVE"] + elif string.find(line, "==> DHT_PORT ") != -1: + direction = log_msg_dir["SEND"] + else: + return None + + if DEBUG == True: + print "--- match DHT_PORT" + + msg_type = bt_msg_types["DHT_PORT"] + parts = re.split("[\[\]<=>]+", line) + + timestamp = string_to_timestamp(parts[0].strip()) + index = 0 + begin = 0 + length = 0 + if direction == log_msg_dir["RECEIVE"]: + port = int("0x" + re.split(":", parts[2])[1].strip(), 16) + else: + port = int(parts[2].strip()) + + return (timestamp, direction, msg_type, index, begin, length, port) + + +# +# parse libtorrent-rasterbar log file line +# +# @line: libtorrent parse log file +# + +def libtorrent_parse_log_line(line): + + result = libtorrent_parse_choke(line) + if result != None: + return result + + result = libtorrent_parse_unchoke(line) + if result != None: + return result + + result = libtorrent_parse_interested(line) + if result != None: + return result + + result = libtorrent_parse_not_interested(line) + if result != None: + return result + + result = libtorrent_parse_have(line) + if result != None: + return result + + result = libtorrent_parse_bitfield(line) + if result != None: + return result + + result = libtorrent_parse_request(line) + if result != None: + return result + + result = libtorrent_parse_piece(line) + if result != None: + return result + + result = libtorrent_parse_cancel(line) + if result != None: + return result + + result = libtorrent_parse_port(line) + if result != None: + return result + + +# +# parse libtorrent-rasterbar log file +# +# @dbw - DatabaseWriter instance +# @client_session_id - client session id in swarm +# @logfile - log file +# + +def libtorrent_parse_log_file(dbw, client_session_id, logfile): + + if os.path.exists(logfile) == False: + print "No such file:", logfile + + basename = os.path.basename(logfile) + + # file name has to follow the ${IP_ADDRESS}_${PORT}.log syntax + tmp_parts = re.split("_", basename) + peer_ip = tmp_parts[0] + tmp_parts2 = re.split("\.", tmp_parts[1]) + str_peer_port = tmp_parts2[0] + extension = tmp_parts2[1] + + try: + socket.inet_aton(peer_ip) + except socket.error: + print "Invalid IP address:", peer_ip + return + + try: + peer_port = int(str_peer_port) + except TypeError: + print "Invalid port:", str_peer_port + return + + if extension != "log": + print "Invalid file name: ", basename + return + + try: + fin = open(logfile, "r") + while 1: + line = fin.readline() + if not line: + break + + line = line.strip() + + if DEBUG == True: + print "+++", line + + result = libtorrent_parse_log_line(line) + if result == None: + continue + + (timestamp, direction, msg_type, index, begin, length, listen_port) = result + if DEBUG == True: + print result + + dbw.add_verbose_message_datetime(client_session_id, timestamp, + direction, peer_ip, peer_port, msg_type, + index, begin,length, listen_port) + + except IOError: + print "Error processing file %s." %logfile + +def usage(): + print "Usage: python StatusParser.py -i|--id id -f|--file log_file database" + print "id:" + print "\t--id" + print "\t-i\t\tclient_session_id" + print "\tstatus_file:" + print "\t--file" + print "\t-f\t\tstatus_file for tribler" + print "\tdatabase\t\tSQLite database file" + print "\t--help" + print "\t-h\t\t\tprint this help screen" + + +def main_just_parse(): + filename = sys.argv[1] + client_session_id = 1 + tribler_parse_status_file(None, 1, filename) + + +def main_with_DB(): + + try: + opts, args = getopt.getopt(sys.argv[1:], "hi:f:", ["help", + "id=", "file="]) + except getopt.GetoptError, err: + print str(err) + usage() + sys.exit(2) + + client_session_id = None + filename = None + database = None + + for o, a in opts: + if o in ("-h", "--help"): + usage() + sys.exit(0) + elif o in ("-i", "--id"): + client_session_id = int(a) + elif o in ("-f", "--file"): + filename = a + else: + assert False, "unhandled option" + + if client_session_id == None: + print "Error: no client session id." + sys.exit(2) + + if filename == None: + print "Error: no status file." + sys.exit(2) + + # no database passed as argument + if len(args) != 1: + print "Error: no database file passed as argument." + sys.exit(2) + database = args[0] + + dbc = DatabaseCommander(database) + + # check for client_session_id, swarm_id, btclient_id + cursor = dbc.select_client_sessions_by_id(client_session_id) + if cursor == None: + print "Error: no client session id (%d) in database." % client_session_id + sys.exit(2) + for session_row in cursor: + pass + + swarm_id = session_row[1] + btclient_id = session_row[2] + + cursor = dbc.select_swarms(swarm_id) + if cursor == None: + print "Error: no swarm id (%d) in database." % swarm_id + sys.exit(2) + for swarm_row in cursor: + pass + + cursor = dbc.select_btclients(btclient_id) + if cursor == None: + print "Error: no client id (%d) in database." % btclient_id + sys.exit(2) + for btclient_row in cursor: + pass + + print "Client session row is: " + print " ", session_row + print "Swarm row is: " + print " ", swarm_row + print "Client row is: " + print " ", btclient_row + print "\nContinue parsing on file %s? (y/n) " % filename, + try: + ans = sys.stdin.readline().strip() + if ans != "y": + sys.exit(0) + except IOError: + print "Error reading standard input." + sys.exit(2) + print "" + + # parse log file + dbw = DatabaseWriter(database) + libtorrent_parse_log_file(dbw, client_session_id, filename) + -# TODO +if __name__ == "__main__": + sys.exit(main_with_DB()) + #sys.exit(main_just_parse()) -- 2.20.1