ppf: TriblerStatusParser, parses test files without writing to DB.
authorAdriana Draghici <adriana008@gmail.com>
Thu, 22 Apr 2010 14:54:38 +0000 (14:54 +0000)
committerAdriana Draghici <adriana008@gmail.com>
Thu, 22 Apr 2010 14:54:38 +0000 (14:54 +0000)
ppf/log-parser/generic/TriblerStatusParser.py [new file with mode: 0644]
ppf/log-samples/tribler/status_sample.out [moved from ppf/log-samples/tribler/verbose_sample2.log with 51% similarity]

diff --git a/ppf/log-parser/generic/TriblerStatusParser.py b/ppf/log-parser/generic/TriblerStatusParser.py
new file mode 100644 (file)
index 0000000..3e1749d
--- /dev/null
@@ -0,0 +1,306 @@
+#!/usr/bin/env python
+
+import sys
+import getopt
+import re
+#from DatabaseWriter import DatabaseWriter
+#from DatabaseCommander import DatabaseCommander
+from GenericStatusParser import GenericStatusParser
+import julian
+import datetime
+
+import logging
+
+
+class TriblerStatusParser(GenericStatusParser):
+    """
+    Parser class used for parsing Tribler BitTorrent log messages.
+    @author Adriana Draghici <adriana.draghici@cti.pub.ro>
+    """
+
+    files_sizes = {} # dictionary: key - filename, value - filesize
+    # configure logging (change to logging.ERROR when no DEBUG required)
+    logging.basicConfig(level=logging.DEBUG)
+    def __init__(self):
+        pass
+
+    def is_status_line(self, line):
+        """  Check if status line. All status messages contain a 
+             status string (e.g. DLSTATUS_DOWNLOADING). 
+             @return boolean
+        """
+        if line.find("DLSTATUS_DOWNLOADING") > -1 or line.find("DLSTATUS_SEEDING") > -1:
+            return True
+        return False
+
+    def is_single_download_line(self, line):
+        """ Tribler's SingleDownload lines contain information about the
+            torrent file.
+            @return boolean
+        """ 
+        if line.find("SingleDownload") == -1:
+            return False
+        return True
+    
+    def get_file_size(self, line):
+        """ Parse a line with this format: 
+            SingleDownload: save_as( u'<filename>' <size_in_bytes> '<download_folder>' <is_dir> )
+            Saves the file name and size. If the line does not correspond to this format, it does nothing.
+        """
+        index = -1
+        parts = []
+        if line.find("save_as") != -1:
+            parts = line.split("'")
+            self.files_sizes[parts[1]] =  int(parts[2]) # saves the filename and its size in bytes
+    
+    def canon_num_peers(self, non_canon_value):
+        """  @return integer """
+        return int(non_canon_value)
+
+    def canon_dht(self, non_canon_value):
+        """ @return integer  """
+        return int(non_canon_value)
+
+    def canon_download_speed(self, non_canon_value):
+        """@return integer, eg. 119.51kb/s -> 119 """
+        return int(float(non_canon_value.strip("KB/s")))
+
+    def canon_upload_speed(self, non_canon_value):
+        """@return integer, eg. 12119.51kb/s -> 12119 """
+        return int(float(non_canon_value.strip("KB/s")))
+
+    def canon_download_size(self, non_canon_value, filename):
+        """@return integer, eg.  25% -> 25*file_size/100"""
+        return int(float(non_canon_value.strip("%")) * self.files_sizes[filename] / 100)
+
+    def canon_upload_size(self, non_canon_value):
+        pass
+   
+    def canon_eta(self, non_canon_value):
+        """@return integer, eg. 26.456787 -> 26 (seconds)"""
+        if non_canon_value != 'None':
+            return int(float(non_canon_value))
+        return None
+
+    def parse_timestamp(self, date, time):
+        """ Get date and timestamp and transform it into datetime format.
+            Format: dd-mm-yyyy hh:mm:ss
+            @return datetime object
+        """
+        
+        date_array = date.split("-");
+        time_array = time.split(":");
+        if len(date_array) != 3 or len(time_array) != 3:
+            return None
+        
+        timestamp = datetime.datetime(int(date_array[2]), int(date_array[1]), int(date_array[0]), #year, month, day
+                                        int(time_array[0]), int(time_array[1]), int(time_array[2])) #hour, min, sec
+        return timestamp
+
+    # return list of required 
+    def parse_status_line(self, line):
+        #
+        # sample tribler status line
+        # 03-Nov-2009 12:18:55   aqua.mpeg DLSTATUS_DOWNLOADING 29.84% None up     0.00KB/s down  4414.39KB/s eta 12 peers 2
+        #
+        num_peers = 0
+        dht = 0
+        download_speed = 0
+        upload_speed = 0
+        download_size = 0
+        upload_size = 0
+        eta = 0
+        filename = ""
+        timestamp = None
+        string_array = re.split("\ *", line)
+        
+        logging.debug("string_array is: " + str(string_array))
+        if len(string_array) != 14:
+            logging.error("Invalid line format!")
+            return None
+
+        # get timestamp and transform it in datetime format
+        timestamp= self.parse_timestamp(string_array[0], string_array[1])
+        
+        filename = string_array[2]
+        
+        i = 3 
+        while i < len(string_array): #string_array:
+            if string_array[i] == "peers":
+                num_peers = self.canon_num_peers(string_array[i+1])
+                i = i + 2
+                continue
+            if string_array[i] == "down":
+                download_speed = self.canon_download_speed(string_array[i+1])
+                i = i + 2
+                continue
+            if string_array[i] == "up":
+                upload_speed = self.canon_upload_speed(string_array[i+1])
+                i = i + 2
+                continue
+            if string_array[i] == "DLSTATUS_DOWNLOADING" or string_array[i] == "DLSTATUS_SEEDING":
+                download_size = self.canon_download_size(string_array[i+1], filename)
+                i = i + 2
+                continue
+            if string_array[i] == "eta":
+                eta = self.canon_eta(string_array[i+1])
+                i = i + 2
+                continue
+            i = i + 1
+        return (timestamp, num_peers, dht, download_speed, upload_speed, download_size, upload_size, eta)
+
+    def parse_status_file(self, client_session_id, session_start, filename, callback_func, callback_arg = None):
+        try:
+            fin = open(filename, "r")
+            while 1:
+                line = fin.readline()
+                if not line:
+                    break
+
+                line = line.strip()
+                if self.is_single_download_line(line) == True:
+                    self.get_file_size(line)
+
+                if self.is_status_line(line) == False:
+                    continue
+
+                (message_time, num_peers, dht, download_speed, upload_speed, download_size, upload_size, eta_seconds) = self.parse_status_line(line)
+                print "lista", (message_time, num_peers, dht, download_speed, upload_speed, download_size, upload_size, eta_seconds)
+                logging.debug("(%s, %d, %d,%d kb/s, %d kb/s, %d bytes, %d bytes)" % (message_time, num_peers, eta_seconds, 
+                                                                                    download_speed, upload_speed, 
+                                                                                    download_size, upload_size))
+                if callback_arg == None:
+                    """callback_func(client_session_id, message_time,
+                            num_peers, dht,
+                            download_speed, upload_speed,
+                            download_size, upload_size,
+                            eta_seconds)
+                    """
+                    pass
+                else:
+                    callback_func(callback_arg, client_session_id, message_time,
+                            num_peers, dht,
+                            download_speed, upload_speed,
+                            download_size, upload_size,
+                            eta_seconds)
+
+        except IOError:
+            logger.error("Error processing file %s." %filename)
+
+def db_write(dbw, client_session_id, message_time,
+                            num_peers, dht,
+                            download_speed, upload_speed,
+                            download_size, upload_size,
+                            eta_seconds):
+    pass
+
+def usage():
+    print "Usage: python TriblerStatusParser.py -i|--id id status_file"
+    print "id:"
+    print "\t--id"
+    print "\t-i\t\tclient_session_id"
+    print "\tstatus_file:"
+    print "\t--file"
+    print "\t-f\t\tstatus_file for tribler"
+    print "\tdatabase\t\tSQLite database file"
+    print "\t--help"
+    print "\t-h\t\t\tprint this help screen"
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hi:f:", ["help",
+            "id=", "file="])
+    except getopt.GetoptError, err:
+        print str(err)
+        usage()
+        sys.exit(2)
+
+    client_session_id = None
+    filename = None
+    database = None
+
+    for o, a in opts:
+        if o in ("-h", "--help"):
+            usage()
+            sys.exit(0)
+        elif o in ("-i", "--id"):
+            client_session_id = int(a)
+        elif o in ("-f", "--file"):
+            filename = a
+        else:
+            assert False, "unhandled option"
+
+    if client_session_id == None:
+        print "Error: no client session id."
+        sys.exit(2)
+
+    if filename == None:
+        print "Error: no status file."
+        sys.exit(2)
+
+    # no database passed as argument
+    if len(args) != 1:
+        print "Error: no database file passed as argument."
+        sys.exit(2)
+    database = args[0]
+
+    dbc = DatabaseCommander(database)
+
+    # check for client_session_id, swarm_id, btclient_id
+    cursor = dbc.select_client_sessions_by_id(client_session_id)
+    if cursor == None:
+        print "Error: no client session id (%d) in database." % client_session_id
+        sys.exit(2)
+    for session_row in cursor:
+        pass
+
+    swarm_id = session_row[1]
+    btclient_id = session_row[2]
+
+    cursor = dbc.select_swarms(swarm_id)
+    if cursor == None:
+        print "Error: no swarm id (%d) in database." % swarm_id
+        sys.exit(2)
+    for swarm_row in cursor:
+        pass
+
+    cursor = dbc.select_btclients(btclient_id)
+    if cursor == None:
+        print "Error: no client id (%d) in database." % btclient_id
+        sys.exit(2)
+    for btclient_row in cursor:
+        pass
+
+    print "Client session row is: "
+    print "    ", session_row
+    print "Swarm row is: "
+    print "    ", swarm_row
+    print "Client row is: "
+    print "    ", btclient_row
+
+    print "\nContinue parsing on file %s? (y/n) " % filename,
+    try:
+        ans = sys.stdin.readline().strip()
+        if ans != "y":
+            sys.exit(0)
+    except IOError:
+        print "Error reading standard input."
+        sys.exit(2)
+    print ""
+
+    session_start = julian.julianToDatetime(session_row[11])
+
+    # parse status file
+    dbw = DatabaseWriter(database)
+    sp = TriblerStatusParser()
+    sp.parse_status_file(client_session_id, session_start, filename, db_write, dbw)
+
+
+def main_just_parse():
+    filename = sys.argv[1]
+    client_session_id = 1
+    sp = TriblerStatusParser()
+    sp.parse_status_file(client_session_id, None, filename, None, None)
+
+if __name__ == "__main__":
+    sys.exit(main_just_parse())
similarity index 51%
rename from ppf/log-samples/tribler/verbose_sample2.log
rename to ppf/log-samples/tribler/status_sample.out
index ed2e986..fa9e2f7 100644 (file)
@@ -1,68 +1,23 @@
 03-11-2009 12:18:50   SingleDownload: save_as( u'aqua.mpeg' 94918656 '../p2p-dld/tribler/' False )
 03-11-2009 12:18:50   SingleDownload: perform_hashcheck()
 03-11-2009 12:18:50   SingleDownload: hashcheck_done()
-03-11-2009 12:18:50   connecter: Disabling overlay
-03-11-2009 12:18:50   connecter: Disabling overlay
-03-11-2009 12:18:50   Connecter: EXTEND: my dict {'Tr_LIVE_v1': 254, 'ut_pex': 1}
 03-11-2009 12:18:50   aqua.mpeg DLSTATUS_HASHCHECKING  0.00% None up     0.00KB/s down     0.00KB/s eta 0.0 peers 0
-03-11-2009 12:18:51   choker: _rechoke: checkinternalbias 0
-03-11-2009 12:18:51   choker: _rechoke: NORMAL UNCHOKE []
-choker: _rechoke: G2G UNCHOKE []
-03-11-2009 12:18:51   choker: _rechoke: checkinternalbias 0
-03-11-2009 12:18:51   choker: _rechoke: NORMAL UNCHOKE []
-choker: _rechoke: G2G UNCHOKE []
 03-11-2009 12:18:52   aqua.mpeg DLSTATUS_DOWNLOADING  6.85% None up     0.00KB/s down  2113.12KB/s eta 26.4850236947 peers 2
 03-11-2009 12:18:53   aqua.mpeg DLSTATUS_DOWNLOADING 14.53% None up     0.00KB/s down  3283.14KB/s eta 17.5126024978 peers 2
 03-11-2009 12:18:54   aqua.mpeg DLSTATUS_DOWNLOADING 22.37% None up     0.00KB/s down  3964.17KB/s eta 14.2589440652 peers 2
-03-11-2009 12:18:55   choker: _rechoke: checkinternalbias 0
-03-11-2009 12:18:55   choker: _rechoke: NORMAL UNCHOKE []
-choker: _rechoke: G2G UNCHOKE []
 03-11-2009 12:18:55   aqua.mpeg DLSTATUS_DOWNLOADING 29.84% None up     0.00KB/s down  4414.39KB/s eta 12.0159059199 peers 2
 03-11-2009 12:18:56   aqua.mpeg DLSTATUS_DOWNLOADING 37.37% None up     0.00KB/s down  4680.87KB/s eta 10.9181039961 peers 2
 03-11-2009 12:18:57   aqua.mpeg DLSTATUS_DOWNLOADING 44.07% None up     0.00KB/s down  4842.35KB/s eta 9.91602307878 peers 2
 03-11-2009 12:18:58   aqua.mpeg DLSTATUS_DOWNLOADING 51.31% None up     0.00KB/s down  4970.70KB/s eta 7.88249513703 peers 2
 03-11-2009 12:18:59   aqua.mpeg DLSTATUS_DOWNLOADING 57.88% None up     0.00KB/s down  5060.50KB/s eta 6.88155028421 peers 2
-03-11-2009 12:19:00   choker: _rechoke: checkinternalbias 0
-03-11-2009 12:19:00   choker: _rechoke: NORMAL UNCHOKE []
-choker: _rechoke: G2G UNCHOKE []
 03-11-2009 12:19:00   aqua.mpeg DLSTATUS_DOWNLOADING 64.60% None up     0.00KB/s down  5140.12KB/s eta 5.86957618791 peers 2
 03-11-2009 12:19:01   aqua.mpeg DLSTATUS_DOWNLOADING 71.95% None up     0.00KB/s down  5220.68KB/s eta 4.78167411882 peers 2
 03-11-2009 12:19:02   aqua.mpeg DLSTATUS_DOWNLOADING 79.18% None up     0.00KB/s down  5309.69KB/s eta 3.76926920014 peers 2
 03-11-2009 12:19:03   aqua.mpeg DLSTATUS_DOWNLOADING 86.62% None up     0.00KB/s down  5371.01KB/s eta 2.68195411759 peers 2
 03-11-2009 12:19:04   aqua.mpeg DLSTATUS_DOWNLOADING 93.89% None up     0.00KB/s down  5440.21KB/s eta 1.67072531777 peers 2
-03-11-2009 12:19:05   choker: _rechoke: checkinternalbias 0
-03-11-2009 12:19:05   choker: _rechoke: NORMAL UNCHOKE []
-choker: _rechoke: G2G UNCHOKE []
-03-11-2009 12:19:05   SingleDownload::finishedfunc called: Download is complete *******************************
-bartercast: no overlay bridge found
-bartercast: no overlay bridge found
 03-11-2009 12:19:06   aqua.mpeg DLSTATUS_SEEDING 100.00% None up     0.00KB/s down     0.00KB/s eta 0 peers 0
-03-11-2009 12:19:06   choker: _rechoke: checkinternalbias 0
-03-11-2009 12:19:06   choker: _rechoke: NORMAL UNCHOKE []
-choker: _rechoke: G2G UNCHOKE []
-03-11-2009 12:19:06   choker: _rechoke: checkinternalbias 0
-03-11-2009 12:19:06   choker: _rechoke: NORMAL UNCHOKE []
-choker: _rechoke: G2G UNCHOKE []
-bartercast: no overlay bridge found
-bartercast: no overlay bridge found
 03-11-2009 12:19:07   aqua.mpeg DLSTATUS_SEEDING 100.00% None up     0.00KB/s down     0.00KB/s eta 0 peers 0
 03-11-2009 12:19:09   aqua.mpeg DLSTATUS_SEEDING 100.00% None up     0.00KB/s down     0.00KB/s eta 0 peers 0
-03-11-2009 12:19:10   choker: _rechoke: checkinternalbias 0
-03-11-2009 12:19:10   choker: _rechoke: NORMAL UNCHOKE []
-choker: _rechoke: G2G UNCHOKE []
 03-11-2009 12:19:10   aqua.mpeg DLSTATUS_SEEDING 100.00% None up     0.00KB/s down     0.00KB/s eta 0 peers 0
 03-11-2009 12:19:12   aqua.mpeg DLSTATUS_SEEDING 100.00% None up     0.00KB/s down     0.00KB/s eta 0 peers 0
 03-11-2009 12:19:13   aqua.mpeg DLSTATUS_SEEDING 100.00% None up     0.00KB/s down     0.00KB/s eta 0 peers 0
-Traceback (most recent call last):
-  File "Tribler/Tools/cmdlinedl.py", line 128, in main
-    time.sleep(sys.maxint/2048)
-KeyboardInterrupt
-03-11-2009 12:19:13   Session: checkpoint_shutdown
-03-11-2009 12:19:14   SingleDownload: shutdown
-03-11-2009 12:19:14   tlm: Number of threads still running 6
-03-11-2009 12:19:14   tlm: Thread still running MainThread daemon False
-03-11-2009 12:19:14   tlm: Thread still running OverlayThread-4 daemon True
-03-11-2009 12:19:14   tlm: Thread still running NetworkThread-3 daemon True
-03-11-2009 12:19:14   tlm: Thread still running UPnPThread-5 daemon True
-03-11-2009 12:19:14   tlm: Thread still running TrackerRerequestAThread-10 daemon True
-03-11-2009 12:19:14   tlm: Thread still running TrackerRerequestBThread-11 daemon True