1 # written by Jelle Roozenburg
2 # see LICENSE.txt for license information
11 Tribler keywordsearch now has the following features:
12 1. All items with one of the keywords in the 'name' field are returned (self.simpleSearch() )
13 2. The sorting of the results is based on:
14 a) The number of matching keywords
15 b) The length of the matching keywords
16 c) If the keywords matched a whole word (search for 'cat' find 'category')
17 (done in self.search() )
18 3. Searching is case insensitive
20 def search(self, haystack, needles, haystackismatching=False):
22 print >>sys.stderr,'kws: unprocessed keywords: %s' % needles
23 needles = self.unRegExpifySearchwords(needles)
25 print >>sys.stderr,'kws: Searching for %s in %d items' % (repr(needles), len(haystack))
27 if not haystackismatching:
28 searchspace = self.simpleSearch(haystack, needles)
30 print >>sys.stderr,'kws: Found %s items using simple search' % len(searchspace)
32 searchspace = haystack
36 for needle in needles:
37 wbsearch.append(re.compile(r'\b%s\b' % needle))
39 for item in searchspace:
40 title = item['name'].lower()
42 for i in xrange(len(needles)):
43 wb = wbsearch[i].findall(title)
44 score += len(wb) * 2 * len(needles[i])
46 if title.find(needles[i].lower()) != -1:
47 score += len(needles[i])
49 results.append((score, item))
51 results.sort(reverse=True)
53 print >>sys.stderr,'kws: Found %d items eventually' % len(results)
56 return [r[1] for r in results]
59 def unRegExpifySearchwords(self, needles):
60 replaceRegExpChars = re.compile(r'(\\|\*|\.|\+|\?|\||\(|\)|\[|\]|\{|\})')
62 for needle in needles:
63 needle = needle.strip()
66 new_needle = re.sub(replaceRegExpChars, r'\\\1', needle.lower())
67 new_needles.append(new_needle)
70 def simpleSearch(self, haystack, needles, searchtype='AND'):
71 "Can do both OR or AND search"
73 if searchtype == 'OR':
75 for needle in needles:
76 searchRegexp+= needle+'|'
77 searchRegexp = re.compile(searchRegexp[:-1])
79 title = item['name'].lower()
80 if len(searchRegexp.findall(title)) > 0:
82 elif searchtype == 'AND':
84 title = item['name'].lower()
86 for needle in needles:
87 if title.find(needle) == -1:
96 data = [{'name':'Fedoras 3.10'},
97 {'name':'Fedora 2.10'},
98 {'name':'Movie 3.10'},
100 {'name':'movie_theater.avi'}
102 words = ['fedora', '1']
103 #print KeywordSearch().simpleSearch(data, words)
104 print KeywordSearch().search(data, words)
105 if __name__ == '__main__':