1 | static int leven_limitedIC(S left, S right, int threshold) { |
2 | if (--threshold < 0) ret 0; |
3 | |
4 | int n = l(left); // length of left |
5 | int m = l(right); // length of right |
6 | |
7 | // if one string is empty, the edit distance is necessarily the length |
8 | // of the other |
9 | if (n == 0) |
10 | ret m <= threshold ? m : threshold+1; |
11 | if (m == 0) |
12 | ret n <= threshold ? n : threshold+1; |
13 | |
14 | // if length difference is > threshold, just bail out |
15 | if (abs(m-n) > threshold) ret threshold+1; |
16 | |
17 | if (n > m) { |
18 | // swap the two strings to consume less memory |
19 | final S tmp = left; |
20 | left = right; |
21 | right = tmp; |
22 | n = m; |
23 | m = right.length(); |
24 | } |
25 | |
26 | int[] p = new int[n + 1]; // 'previous' cost array, horizontally |
27 | int[] d = new int[n + 1]; // cost array, horizontally |
28 | int[] tempD; // placeholder to assist in swapping p and d |
29 | |
30 | // fill in starting table values |
31 | final int boundary = Math.min(n, threshold) + 1; |
32 | for (int i = 0; i < boundary; i++) { |
33 | p[i] = i; |
34 | } |
35 | // these fills ensure that the value above the rightmost entry of our |
36 | // stripe will be ignored in following loop iterations |
37 | Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE); |
38 | Arrays.fill(d, Integer.MAX_VALUE); |
39 | |
40 | // iterates through t - USE LONG to fix JVM SAFE POINTS. |
41 | for (long _j = 1; _j <= m; _j++) { |
42 | int j = (int) _j; |
43 | final char rightJ = right.charAt(j - 1); // jth character of right |
44 | d[0] = j; |
45 | |
46 | // compute stripe indices, constrain to array size |
47 | final int min = Math.max(1, j - threshold); |
48 | final int max = j > Integer.MAX_VALUE - threshold ? n : Math.min( |
49 | n, j + threshold); |
50 | |
51 | // the stripe may lead off of the table if s and t are of different |
52 | // sizes |
53 | if (min > max) { |
54 | return threshold+1; |
55 | } |
56 | |
57 | // ignore entry left of leftmost |
58 | if (min > 1) { |
59 | d[min - 1] = Integer.MAX_VALUE; |
60 | } |
61 | |
62 | // iterates through [min, max] in s |
63 | for (int i = min; i <= max; i++) { |
64 | if (equalsIgnoreCase(left.charAt(i - 1), rightJ)) { |
65 | // diagonally left and up |
66 | d[i] = p[i - 1]; |
67 | } else { |
68 | // 1 + minimum of cell to the left, to the top, diagonally |
69 | // left and up |
70 | d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]); |
71 | } |
72 | } |
73 | |
74 | // copy current distance counts to 'previous row' distance counts |
75 | tempD = p; |
76 | p = d; |
77 | d = tempD; |
78 | } |
79 | |
80 | // if p[n] is greater than the threshold, there's no guarantee on it |
81 | // being the correct |
82 | // distance |
83 | if (p[n] <= threshold) { |
84 | return p[n]; |
85 | } |
86 | return threshold+1; |
87 | } |
Began life as a copy of #1007596
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1008051 |
Snippet name: | leven_limitedIC - limited Levenshtein distance, ignoring case |
Eternal ID of this version: | #1008051/4 |
Text MD5: | c334db1122f818c4d91fdcaf23447d72 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2019-08-13 14:58:34 |
Source code size: | 2715 bytes / 87 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 597 / 626 |
Version history: | 3 change(s) |
Referenced in: | [show references] |