Libraryless. Click here for Pure Java version (512L/5K/15K).
1 | !636 |
2 | !quicknew |
3 | !688 // buf.isEmpty |
4 | !standard functions |
5 | |
6 | abstract class Predictor { |
7 | public abstract String predict(String s, int chars); |
8 | public void preload(String s) {} |
9 | } |
10 | |
11 | main { |
12 | static new (Hash)Set<String> debug; |
13 | |
14 | psvm { |
15 | String input = "abcdefghijklmnopqrstuvwxyz"; |
16 | new List<String> preloads; |
17 | |
18 | for (int i = 0; i < args.length; i++) { |
19 | String arg = args[i]; |
20 | if (arg.equals("debug")) { |
21 | String s = args[++i]; |
22 | debug.add(s); |
23 | debugOn(s); |
24 | } else if (isSnippetID(arg)) |
25 | input = loadSnippet(arg); |
26 | else if (arg.equals("preload")) { |
27 | String s = args[++i]; |
28 | preloads.add(loadSnippet(s)); |
29 | } |
30 | } |
31 | |
32 | input = input.replace("\r", ""); |
33 | |
34 | new List<Predictor> predictors; |
35 | add(predictors, new PLin, new PWords, new PIndent); |
36 | |
37 | for (Predictor p : predictors) |
38 | for (String s : preloads) |
39 | p.preload(s); |
40 | |
41 | new Collector globalCollector; |
42 | int skips = 0, totalSkip = 0; |
43 | new StringBuilder compacted; |
44 | |
45 | for (int splitPoint = 0; splitPoint < input.length(); splitPoint++) { |
46 | String before = input.substring(0, splitPoint); |
47 | String rest = input.substring(splitPoint); |
48 | new Collector collector; |
49 | for (Predictor p : predictors) { |
50 | boolean doDebug = debug.contains(p.getClass().getName().replaceAll("^main\\$", "")); |
51 | int chars = rest.length(); |
52 | String prediction = ""; |
53 | try { |
54 | prediction = p.predict(before, chars); |
55 | if (doDebug) |
56 | System.out.println("Actual prediction: " + prediction); |
57 | } catch (Throwable e) { |
58 | // silent exception |
59 | } |
60 | |
61 | // convenience result fixing for the predictors |
62 | if (prediction == null) prediction = ""; |
63 | if (prediction.length() > chars) |
64 | prediction = prediction.substring(0, chars); |
65 | |
66 | String actual = rest.length() > chars ? rest.substring(0, chars) : rest; |
67 | int improvement = getScore2(prediction, actual); |
68 | collector.add(p, prediction, improvement, splitPoint); |
69 | |
70 | if (doDebug) { |
71 | String expected = actual.substring(0, Math.min(prediction.length()+5, rest.length())); |
72 | System.out.println(splitPoint + "*" + improvement + " " + structure(p) + " -> " + quote(prediction) + " vs " + quote(expected) + " (error " + (actual.length()-improvement) + " of " + rest.length() + ")"); |
73 | } |
74 | |
75 | globalCollector.add(p, prediction, improvement, splitPoint); |
76 | } |
77 | |
78 | int skip = 0; |
79 | if (collector.bestScore > 0) { |
80 | System.out.println(splitPoint + " " + collector.bestScore + " " + structure(collector.winner)); |
81 | |
82 | skip = commonPrefix(rest, collector.winnerResult).length(); |
83 | } |
84 | |
85 | if (skip != 0) { |
86 | System.out.println("Skipping " + skip + ": " + quote(collector.winnerResult.substring(0, skip))); |
87 | splitPoint += skip-1; |
88 | totalSkip += skip; |
89 | ++skips; |
90 | compacted.append('*'); |
91 | } else |
92 | compacted.append(input.charAt(splitPoint)); |
93 | } |
94 | |
95 | System.out.println("\n" + compacted + "\n\n"); |
96 | |
97 | System.out.println("Highest score seen: " + globalCollector.bestScore + " by " + structure(globalCollector.winner) + " at " + globalCollector.splitPoint); |
98 | System.out.println("Total characters skipped: " + totalSkip + "/" + input.length() + " (skips: " + skips + ")"); |
99 | } |
100 | |
101 | static class Collector { |
102 | Predictor winner; |
103 | String winnerResult; |
104 | long bestScore = -1; |
105 | int splitPoint; |
106 | |
107 | void add(Predictor p, String result, long score, int splitPoint) { |
108 | if (winner == null || score > bestScore) { |
109 | winner = p; |
110 | winnerResult = result; |
111 | bestScore = score; |
112 | this.splitPoint = splitPoint; |
113 | } |
114 | } |
115 | } |
116 | |
117 | !include #1000388 // "leven" function (Levenshtein distance) |
118 | |
119 | /*static class P0 extends Predictor { |
120 | public String predict(String s, int chars) { |
121 | return ""; |
122 | } |
123 | } |
124 | |
125 | static class P1 extends Predictor { |
126 | public String predict(String s, int chars) { |
127 | return s; |
128 | } |
129 | }*/ |
130 | |
131 | static class PLin extends Predictor { |
132 | public String predict(String s, int chars) { |
133 | if (s.length() < 2) return ""; |
134 | char a = s.charAt(s.length()-2); |
135 | char b = s.charAt(s.length()-1); |
136 | int step = (char) (((int) b) - (int) a); |
137 | new StringBuilder buf; |
138 | for (int i = 0; i < chars; i++) { |
139 | char c = (char) (((int) a) + step*(i+2)); |
140 | buf.append(c); |
141 | } |
142 | return buf.toString(); |
143 | } |
144 | } |
145 | |
146 | static <C> void add(Collection<C> c, C... objects) { |
147 | for (C x : objects) c.add(x); |
148 | } |
149 | |
150 | static String structure(Object o) { |
151 | String name = o.getClass().getName(); |
152 | |
153 | new StringBuilder buf; |
154 | |
155 | if (o instanceof Collection) { |
156 | for (Object x : (Collection) o) { |
157 | if (!buf.isEmpty()) buf.append(", "); |
158 | buf.append(structure(x)); |
159 | } |
160 | return "{" + buf + "}"; |
161 | } |
162 | |
163 | // Need more cases? This should cover all library classes... |
164 | if (name.startsWith("java.") || name.startsWith("javax.")) |
165 | return String.valueOf(o); |
166 | |
167 | String shortName = o.getClass().getName().replaceAll("^main\\$", ""); |
168 | |
169 | // TODO: go to superclasses too |
170 | Field[] fields = o.getClass().getDeclaredFields(); |
171 | for (Field field : fields) { |
172 | if ((field.getModifiers() & Modifier.STATIC) != 0) |
173 | continue; |
174 | Object value; |
175 | try { |
176 | value = field.get(o); |
177 | } catch (Exception e) { |
178 | value = "?"; |
179 | } |
180 | |
181 | String fieldName = field.getName(); |
182 | |
183 | // special case for PWords - show only number of preloaded words |
184 | if (shortName.equals("PWords") && field.getName().equals("preloaded")) |
185 | value = ((Collection) value).size(); |
186 | |
187 | if (!buf.isEmpty()) buf.append(", "); |
188 | buf.append(fieldName + "=" + structure(value)); |
189 | } |
190 | String s = shortName; |
191 | if (!buf.isEmpty()) |
192 | s += "(" + buf + ")"; |
193 | return s; |
194 | } |
195 | |
196 | static int getScore1(String prediction, String rest) { |
197 | int error = leven(prediction, rest); |
198 | return rest.length()-error; |
199 | } |
200 | |
201 | static int getScore2(String prediction, String rest) { |
202 | return commonPrefix(prediction, rest).length(); |
203 | } |
204 | |
205 | static class PWords extends Predictor { |
206 | static boolean debug; |
207 | new (Tree)Set<String> preloaded; |
208 | |
209 | public void preload(String s) { |
210 | preloaded.addAll(findWords(s)); |
211 | } |
212 | |
213 | public String predict(String s, int chars) { |
214 | Set<String> words = findWords(s); |
215 | words.addAll(preloaded); |
216 | String word = match("\\w+$", s); |
217 | if (word == null) return ""; |
218 | if (debug) |
219 | System.out.println("Looking for: " + word); |
220 | for (String w : words) |
221 | if (w.startsWith(word)) { |
222 | String pred = w.substring(word.length()); |
223 | if (debug) |
224 | System.out.println("PWords: predicted " + quote(pred) + " for " + quote(word) + " based on word " + quote(w)); |
225 | return pred; |
226 | } |
227 | return ""; |
228 | } |
229 | |
230 | boolean firstTime = true; |
231 | Set<String> findWords(String s) { |
232 | Set<String> words = new TreeSet<String>( |
233 | Collections.reverseOrder()); // important so partial matches come later |
234 | words.addAll(matchAll("\\w+", s)); |
235 | if (debug && firstTime && s.length() >= 100) { |
236 | firstTime = false; |
237 | //System.out.println("Words found: " + structure(words)); |
238 | } |
239 | return words; |
240 | } |
241 | } |
242 | |
243 | static String match(String pattern, String text) { |
244 | List<String> matches = new ArrayList<String>(); |
245 | Matcher matcher = Pattern.compile(pattern).matcher(text); |
246 | return matcher.find() ? matcher.group() : null; |
247 | } |
248 | |
249 | static class PIndent extends Predictor { |
250 | public String predict(String s, int chars) { |
251 | int i = s.lastIndexOf('\n'); |
252 | int j = i+1; |
253 | while (j < s.length() && (s.charAt(j) == ' ' || s.charAt(j) == '\t')) |
254 | ++j; |
255 | return "\n" + s.substring(i, j); |
256 | } |
257 | } |
258 | } |
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #705 |
Snippet name: | An experiment with predictors |
Eternal ID of this version: | #705/1 |
Text MD5: | 8059a7beeef1b00168b04cfdf9dae892 |
Transpilation MD5: | dc3c4fae9e9a22d11ea51f435459ad77 |
Author: | stefan |
Category: | |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-08-03 00:38:34 |
Source code size: | 8213 bytes / 258 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 603 / 697 |
Referenced in: | [show references] |