1 | static int leven_limited(S left, S right, int threshold) { |
2 | if (--threshold < 0) ret 0; |
3 | |
4 | int n = left.length(); // length of left |
5 | int m = right.length(); // length of right |
6 | |
7 | // if one string is empty, the edit distance is necessarily the length |
8 | // of the other |
9 | if (n == 0) { |
10 | return m <= threshold ? m : threshold+1; |
11 | } else if (m == 0) { |
12 | return n <= threshold ? n : threshold+1; |
13 | } |
14 | |
15 | if (n > m) { |
16 | // swap the two strings to consume less memory |
17 | final S tmp = left; |
18 | left = right; |
19 | right = tmp; |
20 | n = m; |
21 | m = right.length(); |
22 | } |
23 | |
24 | int[] p = new int[n + 1]; // 'previous' cost array, horizontally |
25 | int[] d = new int[n + 1]; // cost array, horizontally |
26 | int[] tempD; // placeholder to assist in swapping p and d |
27 | |
28 | // fill in starting table values |
29 | final int boundary = Math.min(n, threshold) + 1; |
30 | for (int i = 0; i < boundary; i++) { |
31 | p[i] = i; |
32 | } |
33 | // these fills ensure that the value above the rightmost entry of our |
34 | // stripe will be ignored in following loop iterations |
35 | Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE); |
36 | Arrays.fill(d, Integer.MAX_VALUE); |
37 | |
38 | // iterates through t - USE LONG to fix JVM SAFE POINTS. |
39 | for (long _j = 1; _j <= m; _j++) { |
40 | int j = (int) _j; |
41 | final char rightJ = right.charAt(j - 1); // jth character of right |
42 | d[0] = j; |
43 | |
44 | // compute stripe indices, constrain to array size |
45 | final int min = Math.max(1, j - threshold); |
46 | final int max = j > Integer.MAX_VALUE - threshold ? n : Math.min( |
47 | n, j + threshold); |
48 | |
49 | // the stripe may lead off of the table if s and t are of different |
50 | // sizes |
51 | if (min > max) { |
52 | return threshold+1; |
53 | } |
54 | |
55 | // ignore entry left of leftmost |
56 | if (min > 1) { |
57 | d[min - 1] = Integer.MAX_VALUE; |
58 | } |
59 | |
60 | // iterates through [min, max] in s |
61 | for (int i = min; i <= max; i++) { |
62 | if (left.charAt(i - 1) == rightJ) { |
63 | // diagonally left and up |
64 | d[i] = p[i - 1]; |
65 | } else { |
66 | // 1 + minimum of cell to the left, to the top, diagonally |
67 | // left and up |
68 | d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]); |
69 | } |
70 | } |
71 | |
72 | // copy current distance counts to 'previous row' distance counts |
73 | tempD = p; |
74 | p = d; |
75 | d = tempD; |
76 | } |
77 | |
78 | // if p[n] is greater than the threshold, there's no guarantee on it |
79 | // being the correct |
80 | // distance |
81 | if (p[n] <= threshold) { |
82 | return p[n]; |
83 | } |
84 | return threshold+1; |
85 | } |
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1007596 |
Snippet name: | leven_limited - limited Levenshtein distance. returns threshold+1 if distance > threshold |
Eternal ID of this version: | #1007596/7 |
Text MD5: | 150a5f904262dffe602d47114dcf0e51 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2018-09-23 14:03:46 |
Source code size: | 2625 bytes / 85 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 500 / 536 |
Version history: | 6 change(s) |
Referenced in: | [show references] |