2 # see LICENSE.txt for license information
5 execpath = os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), '..', '..')
6 sys.path.append(execpath)
8 from Utility.utility import getMetainfo
9 from BaseLib.Category.Category import Category
13 def testFilter(catfilename, torrentpath):
14 readCategorisationFile(catfilename)
15 #print 'Install_dir is %s' % execpath
16 c = Category.getInstance(execpath, None)
17 total = porn = fn = fp = 0
18 for tfilename,isporn in tdict.items():
19 torrent = getMetainfo(os.path.join(torrentpath,tfilename))
20 name = torrent['info']['name']
21 cat = c.calculateCategory(torrent, name)
22 fporn = (cat == ['xxx'])
27 print (isporn, fporn), 'good', name
29 elif isporn and not fporn:
31 print 'FALSE NEGATIVE'
32 showTorrent(os.path.join(torrentpath,tfilename))
33 elif not isporn and fporn:
35 print 'FALSE POSITIVE'
36 showTorrent(os.path.join(torrentpath,tfilename))
39 Total torrents: %(total)d
40 XXX torrents: %(porn)d
41 Correct filtered: %(good)d
42 False negatives: %(fn)d
43 False positives: %(fp)d
44 """ % {'total':total, 'porn':porn, 'fn':fn,'fp':fp,'good':total-fn-fp}
46 def readCategorisationFile(filename):
50 f = file(filename, 'r')
51 lines = f.read().splitlines()
54 parts = line.split('\t')
55 tdict[parts[0]] = bool(int(parts[1]))
58 print 'No file %s found, starting with empty file' % filename
60 def getTorrentData(path, max_num=-1):
63 for fname in os.listdir(path):
64 if fname.endswith('.torrent'):
65 torrents.append(os.path.join(path,fname))
67 print 'Loaded: %d torrents' % i
71 print 'Loaded %d torrents' % len(torrents)
74 def showTorrent(path):
75 torrent = getMetainfo(os.path.join(path))
76 name = torrent['info']['name']
77 print '------------------------------'
82 # the multi-files mode
83 for ifiles in torrent['info']["files"]:
84 files_list.append((ifiles['path'][-1], ifiles['length'] / float(__size_change)))
87 files_list.append((torrent['info']["name"],torrent['info']['length'] / float(__size_change)))
88 for fname, fsize in files_list:
89 print'\t\t%s\t%d kb' % (fname, fsize)
90 print 'Torrent name: %s' % name
91 print '\ttracker:%s' % torrent['announce']
92 print '------------------------------'
94 def createTorrentDataSet(filename, torrentpath):
95 initSaveFile(filename)
96 f_out = file(filename, 'a')
97 torrents = getTorrentData(torrentpath)
98 for torrent in torrents:
99 if os.path.split(torrent)[-1] in tset: # already done
103 while ans not in ['q', 'y','n']:
104 print 'Is this torrent porn? (y/n/q)'
105 ans = sys.stdin.readline()[:-1].lower()
109 saveTorrent(f_out, torrent, (ans=='y'))
112 def saveTorrent(f_out, torrent, boolean):
115 tfilename = os.path.split(torrent)[-1]
117 f_out.write('%s\t%d\n' % (tfilename, int(boolean)))
121 def initSaveFile(filename):
125 f = file(filename, 'r')
126 lines = f.read().splitlines()
128 tset.add(line.split('\t')[0])
131 print 'No file %s found, starting with empty file' % filename
136 if len(args) != 4 or args[1] not in ['categorise', 'test']:
137 print 'Usage 1: %s categorise [torrent-dir] [torrent-data-file]' % args[0]
138 print 'Usage 2: %s test [torrent-dir] [torrent-data-file]' % args[0]
140 if args[1] == 'categorise':
141 createTorrentDataSet(args[3], args[2])
142 elif args[1] == 'test':
143 testFilter(args[3], args[2])
147 if __name__ == '__main__':