1 # Written by Maarten Clemens, Jelle Roozenburg
2 # see LICENSE.txt for license information
4 #http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance
6 def editDist(str1,str2, maxlength=14):
7 # If fast is set: only calculate titles with same #fast initial chars
8 if not str1 or not str2: # protect against empty strings
11 str1 = str1[:maxlength].lower()
12 str2 = str2[:maxlength].lower()
17 d = [range(lenStr2+1)]
20 for i in range(lenStr1):
22 for j in range(lenStr2):
23 penalty = 1./max(i+1,j+1)
25 if str1[i] == str2[j]:
29 deletion = d[i][j+1] + penalty
30 insertion = row[j] + penalty
31 substitution = d[i][j] + cost
32 row.append(min(deletion,insertion,substitution))
33 (deletion,insertion,substitution)
34 if i>0 and j>0 and str1[i] == str2[j-1] and str1[i-1] == str2[j]:
35 row[j+1] = min(row[j+1], d[i-1][j-1]+cost) # transposition
39 ##maxi = max(lenStr1,lenStr2) # for penalty = 1
40 maxi = sum([1./j for j in range(max(lenStr1,lenStr2)+1)[1:]])
41 return 1.*d[lenStr1][lenStr2]/ maxi
44 if __name__ == '__main__':
48 print editDist(str1, str2)
51 ## d,e = EditDist('mamamstein','levenstein')