fully functional libtorrent log parser
authorRazvan Deaconescu <razvan.deaconescu@cs.pub.ro>
Sat, 21 Nov 2009 19:10:30 +0000 (21:10 +0200)
committerRazvan Deaconescu <razvan.deaconescu@cs.pub.ro>
Sat, 21 Nov 2009 19:11:39 +0000 (21:11 +0200)
log-parser/libtorrent/LogParser.py

index 96ba9b9..a162e29 100644 (file)
@@ -1,6 +1,570 @@
 #!/usr/bin/env python
 
+#
+# Parser for libtorrent-rasterbar verbose messages
+#      http://www.rasterbar.com/products/libtorrent/
+#
+# author: 2009, Adriana Draghici, adriana008@gmail.com
+# updates: November 2009, Razvan Deaconescu, razvan.deaconescu@cs.pub.ro
+#
+
 import sys
 from DatabaseWriter import DatabaseWriter
+from DatabaseCommander import DatabaseCommander
+import julian
+import datetime
+import time
+import getopt
+import re
+import socket
+import string
+import os
+
+# the names used by Tribler for the BitTorrent messages 
+bt_msg_types = {"CHOKE": 0, "UNCHOKE": 1, "INTERESTED": 2,
+        "NOT_INTERESTED": 3, "HAVE": 4, "BITFIELD": 5,
+        "REQUEST": 6, "PIECE": 7, "CANCEL": 8, "DHT_PORT": 9}
+
+log_msg_dir = {"RECEIVE": 0, "SEND": 1}
+
+DEBUG = False
+
+LOG_YEAR=2009
+
+#
+# convert string "Mon DD HH:MM:SS" to datetime
+#
+
+def string_to_timestamp(date_string):
+    try:
+        my_time = time.strptime(date_string + " %s" % (LOG_YEAR), "%b %d %H:%M:%S %Y")
+        my_date = datetime.datetime(my_time[0], my_time[1], my_time[2], my_time[3], my_time[4], my_time[5], my_time[6])
+    except ValueError:
+        print "Invalid date:", date_string
+
+    return my_date
+
+#
+# parse choke line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:39:50 <== CHOKE
+#
+
+def libtorrent_parse_choke(line):
+    if string.find(line, "<== CHOKE ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> CHOKE ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match CHOKE"
+
+    msg_type = bt_msg_types["CHOKE"]
+    parts = re.split("[<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = 0
+    begin = 0
+    length = 0
+    port = 0
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# parse unchoke line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:40:00 <== UNCHOKE
+
+def libtorrent_parse_unchoke(line):
+    if string.find(line, "<== UNCHOKE ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> UNCHOKE ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match UNCHOKE"
+
+    msg_type = bt_msg_types["UNCHOKE"]
+    parts = re.split("[<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = 0
+    begin = 0
+    length = 0
+    port = 0
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# parse interested line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:20:48 ==> INTERESTED
+#
+
+def libtorrent_parse_interested(line):
+    if string.find(line, "<== INTERESTED ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> INTERESTED ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match INTERESTED"
+
+    msg_type = bt_msg_types["INTERESTED"]
+    parts = re.split("[<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = 0
+    begin = 0
+    length = 0
+    port = 0
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# parse not interested line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:39:49 ==> NOT_INTERESTED
+#
+
+def libtorrent_parse_not_interested(line):
+    if string.find(line, "<== NOT_INTERESTED ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> NOT_INTERESTED ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match NOT_INTERESTED"
+
+    msg_type = bt_msg_types["NOT_INTERESTED"]
+    parts = re.split("[<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = 0
+    begin = 0
+    length = 0
+    port = 0
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# parse have line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:20:48 <== HAVE    [ piece: 839]
+#
+
+def libtorrent_parse_have(line):
+    if string.find(line, "<== HAVE ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> HAVE ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match HAVE"
+
+    msg_type = bt_msg_types["HAVE"]
+    parts = re.split("[\[\]<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = int("0x" + re.split(":", parts[2].strip())[1].strip(), 16)
+    begin = 0
+    length = 0
+    port = 0
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# parse bitfield line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:20:48 ==> BITFIELD 00000...
+#
+
+def libtorrent_parse_bitfield(line):
+    if string.find(line, "<== BITFIELD ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> BITFIELD ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match BITFIELD"
+
+    msg_type = bt_msg_types["BITFIELD"]
+    parts = re.split("[<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = 0
+    begin = 0
+    length = 0
+    port = 0
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# parse request line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:39:50 <== REQUEST [ piece: 6cc | s: 14000 | l: 4000 ]
+#
+
+def libtorrent_parse_request(line):
+    if string.find(line, "<== REQUEST ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> REQUEST ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match REQUEST"
+
+    msg_type = bt_msg_types["REQUEST"]
+    parts = re.split("[\[\]|<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = int("0x" + re.split(":", parts[2])[1].strip(), 16)
+    begin = int("0x" + re.split(":", parts[3])[1].strip(), 16)
+    length = int("0x" + re.split(":", parts[4])[1].strip(), 16)
+
+    port = 0
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# parse piece line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:39:50 ==> PIECE   [ piece: 5c6 | s: 24000 | l: 4000 ]
+#
+
+def libtorrent_parse_piece(line):
+    if string.find(line, "<== PIECE ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> PIECE ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match PIECE"
+
+    msg_type = bt_msg_types["PIECE"]
+    parts = re.split("[\[\]|<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = int("0x" + re.split(":", parts[2])[1].strip(), 16)
+    begin = int("0x" + re.split(":", parts[3])[1].strip(), 16)
+    length = int("0x" + re.split(":", parts[4])[1].strip(), 16)
+    port = 0
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# no cancel line in libtorrent log files
+#
+
+def libtorrent_parse_cancel(line):
+    return None
+
+#
+# parse allowed fast line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:20:48 ==> ALLOWED_FAST [ 2098 ]
+#
+
+def libtorrent_parse_allowed_fast(line):
+    if string.find(line, "<== ALLOWED_FAST ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> ALLOWED_FAST ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match ALLOWED_FAST"
+
+    msg_type = bt_msg_types["ALLOWED_FAST"]
+    parts = re.split("[\[\]<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = 0
+    begin = 0
+    length = 0
+    port = int(parts[2].strip())
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+#
+# parse DHT port line in libtorrent log file
+#
+# sample line
+#    Jan 08 22:20:48 ==> DHT_PORT [ 50200 ]
+#
+
+def libtorrent_parse_port(line):
+    if string.find(line, "<== DHT_PORT ") != -1:
+        direction = log_msg_dir["RECEIVE"]
+    elif string.find(line, "==> DHT_PORT ") != -1:
+        direction = log_msg_dir["SEND"]
+    else:
+        return None
+
+    if DEBUG == True:
+        print "--- match DHT_PORT"
+
+    msg_type = bt_msg_types["DHT_PORT"]
+    parts = re.split("[\[\]<=>]+", line)
+
+    timestamp = string_to_timestamp(parts[0].strip())
+    index = 0
+    begin = 0
+    length = 0
+    if direction == log_msg_dir["RECEIVE"]:
+        port = int("0x" + re.split(":", parts[2])[1].strip(), 16)
+    else:
+        port = int(parts[2].strip())
+
+    return (timestamp, direction, msg_type, index, begin, length, port)
+
+
+#
+# parse libtorrent-rasterbar log file line
+#
+# @line: libtorrent parse log file
+#
+
+def libtorrent_parse_log_line(line):
+
+    result = libtorrent_parse_choke(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_unchoke(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_interested(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_not_interested(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_have(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_bitfield(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_request(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_piece(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_cancel(line)
+    if result != None:
+        return result
+
+    result = libtorrent_parse_port(line)
+    if result != None:
+        return result
+
+
+#
+# parse libtorrent-rasterbar log file
+#
+# @dbw - DatabaseWriter instance
+# @client_session_id - client session id in swarm
+# @logfile - log file
+#
+
+def libtorrent_parse_log_file(dbw, client_session_id, logfile):
+
+    if os.path.exists(logfile) == False:
+        print "No such file:", logfile
+
+    basename = os.path.basename(logfile)
+
+    # file name has to follow the ${IP_ADDRESS}_${PORT}.log syntax
+    tmp_parts = re.split("_", basename)
+    peer_ip = tmp_parts[0]
+    tmp_parts2 = re.split("\.", tmp_parts[1])
+    str_peer_port = tmp_parts2[0]
+    extension = tmp_parts2[1]
+
+    try:
+        socket.inet_aton(peer_ip)
+    except socket.error:
+        print "Invalid IP address:", peer_ip
+        return
+
+    try:
+        peer_port = int(str_peer_port)
+    except TypeError:
+        print "Invalid port:", str_peer_port
+        return
+
+    if extension != "log":
+        print "Invalid file name: ", basename
+        return
+
+    try:
+        fin = open(logfile, "r")
+        while 1:
+            line = fin.readline()
+            if not line:
+                break
+
+            line = line.strip()
+
+            if DEBUG == True:
+                print "+++", line
+
+            result = libtorrent_parse_log_line(line)
+            if result == None:
+                continue
+
+            (timestamp, direction, msg_type, index, begin, length, listen_port) = result
+            if DEBUG == True:
+                print result 
+
+            dbw.add_verbose_message_datetime(client_session_id, timestamp,
+                    direction, peer_ip, peer_port, msg_type,
+                    index, begin,length, listen_port)
+
+    except IOError:
+        print "Error processing file %s." %logfile
+
+def usage():
+    print "Usage: python StatusParser.py -i|--id id -f|--file log_file database"
+    print "id:"
+    print "\t--id"
+    print "\t-i\t\tclient_session_id"
+    print "\tstatus_file:"
+    print "\t--file"
+    print "\t-f\t\tstatus_file for tribler"
+    print "\tdatabase\t\tSQLite database file"
+    print "\t--help"
+    print "\t-h\t\t\tprint this help screen"
+
+
+def main_just_parse():
+    filename = sys.argv[1]
+    client_session_id = 1
+    tribler_parse_status_file(None, 1, filename)
+
+
+def main_with_DB():
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hi:f:", ["help",
+            "id=", "file="])
+    except getopt.GetoptError, err:
+        print str(err)
+        usage()
+        sys.exit(2)
+
+    client_session_id = None
+    filename = None
+    database = None
+
+    for o, a in opts:
+        if o in ("-h", "--help"):
+            usage()
+            sys.exit(0)
+        elif o in ("-i", "--id"):
+            client_session_id = int(a)
+        elif o in ("-f", "--file"):
+            filename = a
+        else:
+            assert False, "unhandled option"
+
+    if client_session_id == None:
+        print "Error: no client session id."
+        sys.exit(2)
+
+    if filename == None:
+        print "Error: no status file."
+        sys.exit(2)
+
+    # no database passed as argument
+    if len(args) != 1:
+        print "Error: no database file passed as argument."
+        sys.exit(2)
+    database = args[0]
+
+    dbc = DatabaseCommander(database)
+
+    # check for client_session_id, swarm_id, btclient_id
+    cursor = dbc.select_client_sessions_by_id(client_session_id)
+    if cursor == None:
+        print "Error: no client session id (%d) in database." % client_session_id
+        sys.exit(2)
+    for session_row in cursor:
+        pass
+
+    swarm_id = session_row[1]
+    btclient_id = session_row[2]
+
+    cursor = dbc.select_swarms(swarm_id)
+    if cursor == None:
+        print "Error: no swarm id (%d) in database." % swarm_id
+        sys.exit(2)
+    for swarm_row in cursor:
+        pass
+
+    cursor = dbc.select_btclients(btclient_id)
+    if cursor == None:
+        print "Error: no client id (%d) in database." % btclient_id
+        sys.exit(2)
+    for btclient_row in cursor:
+        pass
+
+    print "Client session row is: "
+    print "    ", session_row
+    print "Swarm row is: "
+    print "    ", swarm_row
+    print "Client row is: "
+    print "    ", btclient_row
+    print "\nContinue parsing on file %s? (y/n) " % filename,
+    try:
+        ans = sys.stdin.readline().strip()
+        if ans != "y":
+            sys.exit(0)
+    except IOError:
+        print "Error reading standard input."
+        sys.exit(2)
+    print ""
+
+    # parse log file
+    dbw = DatabaseWriter(database)
+    libtorrent_parse_log_file(dbw, client_session_id, filename)
+
 
-# TODO
+if __name__ == "__main__":
+    sys.exit(main_with_DB())
+    #sys.exit(main_just_parse())