Libraryless. Click here for Pure Java version (2400L/19K/57K).
1 | !747 |
2 | |
3 | abstract class P { |
4 | abstract void write(S tok); |
5 | abstract S read(); |
6 | } |
7 | |
8 | !include #1000774 // CircularFifoBuffer |
9 | |
10 | m { |
11 | static int numSnippets = 2000; |
12 | static boolean showGUI = true; |
13 | static int maxCharsGUI = 500000; |
14 | |
15 | static Collector collector; |
16 | static L<S> tok; |
17 | static Set<int> predicted; |
18 | |
19 | static class Chain extends P { |
20 | new L<P> list; |
21 | |
22 | *() {} |
23 | *(L<P> *list) {} |
24 | *(P... a) { list = asList(a); } |
25 | |
26 | void add(P p) { list.add(p); } |
27 | |
28 | void write(S tok) { |
29 | for (P p : list) |
30 | p.write(tok); |
31 | } |
32 | |
33 | S read() { |
34 | for (P p : list) { |
35 | S s = p.read(); |
36 | if (s != null) return s; |
37 | } |
38 | return null; |
39 | } |
40 | } |
41 | |
42 | static class Pairs extends P { |
43 | new Map<S,S> map; |
44 | S last; |
45 | |
46 | void write(S tok) { |
47 | if (last != null) |
48 | map.put(last, tok); |
49 | last = tok; |
50 | } |
51 | |
52 | S read() { |
53 | return last == null ? null : map.get(last); |
54 | } |
55 | } |
56 | |
57 | static class Tuples extends P { |
58 | Map<L<S>,S> map = new HashMap<L<S>,S>(); |
59 | int n; |
60 | CircularFifoBuffer<S> buf; |
61 | |
62 | *(int *n) { |
63 | buf = new CircularFifoBuffer<S>(n); |
64 | } |
65 | |
66 | void write(S tok) { |
67 | if (buf.size() == n) |
68 | map.put(new ArrayList<S>(buf.getBackingStore()), tok); |
69 | buf.add(tok); |
70 | } |
71 | |
72 | S read() { |
73 | if (buf.size() == n) |
74 | return map.get(new ArrayList<S>(buf.getBackingStore())); |
75 | return null; |
76 | } |
77 | } |
78 | |
79 | // Bla x = new Bla |
80 | static class NewX extends P { |
81 | CircularFifoBuffer<S> buf = new CircularFifoBuffer<S>(4); |
82 | |
83 | void write(S tok) { buf.add(tok); } |
84 | |
85 | S read() { |
86 | if (buf.size() == 4) { |
87 | L<S> l = buf.asList(); |
88 | if (l.get(2).equals("=") && l.get(3).equals("new")) |
89 | return l.get(0); |
90 | } |
91 | return null; |
92 | } |
93 | } |
94 | |
95 | p { |
96 | tok = makeCorpusJavaTok(numSnippets); |
97 | print("Tokens in corpus: " + tok.size()); |
98 | |
99 | collector = new Collector; |
100 | test(new Pairs); |
101 | test(new Tuples(2)); |
102 | test(new Tuples(3)); |
103 | test(new Tuples(4)); |
104 | test(new Chain(new Tuples(2), new Pairs)); |
105 | test(new Chain(new Tuples(4), new Tuples(3), new Tuples(2), new Pairs)); |
106 | test(new Chain(new NewX, new Tuples(4), new Tuples(3), new Tuples(2), new Pairs)); |
107 | |
108 | if (collector.winner != null && showGUI) { |
109 | predicted = collector.predicted; |
110 | showColoredText(); |
111 | } |
112 | } |
113 | |
114 | // test a predictor |
115 | static void test(P p) { |
116 | predicted = new TreeSet<int>(); |
117 | int points = 0, total = 0; |
118 | for (int i = 1; i < tok.size(); i += 2) { |
119 | S t = tok.get(i); |
120 | S x = p.read(); |
121 | boolean correct = t.equals(x); |
122 | total += t.length(); |
123 | if (correct) { |
124 | predicted.add(i); |
125 | points += t.length(); |
126 | } |
127 | p.write(t); |
128 | } |
129 | double score = points*100.0/total; |
130 | collector.add(p, score); |
131 | } |
132 | |
133 | static void showColoredText() ctex { |
134 | JFrame jf = new JFrame("Predicted = green"); |
135 | Container cp = jf.getContentPane(); |
136 | |
137 | JTextPane pane = new JTextPane(); |
138 | //pane.setFont(loadFont("#1000993", 24)); |
139 | Document doc = pane.getStyledDocument(); |
140 | |
141 | int i = tok.size(), len = 0; |
142 | while (len <= maxCharsGUI && i > 0) { |
143 | --i; |
144 | len += tok.get(i).length(); |
145 | } |
146 | |
147 | for (; i < tok.size(); i++) { |
148 | if (tok.get(i).length() == 0) continue; |
149 | boolean green = predicted.contains(i); |
150 | SimpleAttributeSet set = new SimpleAttributeSet(); |
151 | StyleConstants.setForeground(set, green ? Color.green : Color.gray); |
152 | doc.insertString(doc.getLength(), tok.get(i), set); |
153 | } |
154 | |
155 | JScrollPane scrollPane = new JScrollPane(pane); |
156 | cp.add(scrollPane, BorderLayout.CENTER); |
157 | |
158 | jf.setBounds(100, 100, 600, 600); |
159 | jf.setVisible(true); |
160 | } |
161 | |
162 | !include #1000989 // SnippetDB |
163 | |
164 | static L<S> makeCorpusJavaTok(int numSnippets) { |
165 | SnippetDB db = new SnippetDB("#1000673"); |
166 | List<List<S>> rows = db.rowsOrderedBy("sn_created"); |
167 | new L<S> tok; |
168 | for (int i = 0; i < Math.min(rows.size(), numSnippets); i++) { |
169 | new StringBuilder buf; |
170 | S id = db.getField(rows.get(i), "sn_id"); |
171 | S title = db.getField(rows.get(i), "sn_title"); |
172 | S text = db.getField(rows.get(i), "sn_text"); |
173 | buf.append("\n== ID: " + id); |
174 | buf.append("\n== Title: " + title); |
175 | buf.append("\n==\n"); |
176 | buf.append(text).append("\n"); |
177 | if (tok.size() != 0) tok.remove(tok.size()-1); |
178 | tok.addAll(javaTok(buf.toString())); |
179 | ++i; |
180 | } |
181 | return tok; |
182 | } |
183 | |
184 | static class Collector { |
185 | P winner; |
186 | double bestScore = -1; |
187 | Set<int> predicted; |
188 | |
189 | void add(P p, double score) { |
190 | if (winner == null || score > bestScore) { |
191 | winner = p; |
192 | bestScore = score; |
193 | print("New best score: " + formatDouble(score, 2) + "% (" + shorten(structure(p), 100) + ")"); |
194 | this.predicted = main.predicted; |
195 | } |
196 | } |
197 | } |
198 | } |
Began life as a copy of #1000991
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1000995 |
Snippet name: | Token prediction, multiple predictors |
Eternal ID of this version: | #1000995/1 |
Text MD5: | 11bef061c74da579d77f57eacd842ca3 |
Transpilation MD5: | 2850bc9c2d210916bf4ecb2bf53c196d |
Author: | stefan |
Category: | |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-09-14 21:36:10 |
Source code size: | 4998 bytes / 198 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 731 / 714 |
Referenced in: | [show references] |