Libraryless. Click here for Pure Java version (2400L/19K/57K).
!747 abstract class P { abstract void write(S tok); abstract S read(); } !include #1000774 // CircularFifoBuffer m { static int numSnippets = 2000; static boolean showGUI = true; static int maxCharsGUI = 500000; static Collector collector; static L<S> tok; static Set<int> predicted; static class Chain extends P { new L<P> list; *() {} *(L<P> *list) {} *(P... a) { list = asList(a); } void add(P p) { list.add(p); } void write(S tok) { for (P p : list) p.write(tok); } S read() { for (P p : list) { S s = p.read(); if (s != null) return s; } return null; } } static class Pairs extends P { new Map<S,S> map; S last; void write(S tok) { if (last != null) map.put(last, tok); last = tok; } S read() { return last == null ? null : map.get(last); } } static class Tuples extends P { Map<L<S>,S> map = new HashMap<L<S>,S>(); int n; CircularFifoBuffer<S> buf; *(int *n) { buf = new CircularFifoBuffer<S>(n); } void write(S tok) { if (buf.size() == n) map.put(new ArrayList<S>(buf.getBackingStore()), tok); buf.add(tok); } S read() { if (buf.size() == n) return map.get(new ArrayList<S>(buf.getBackingStore())); return null; } } // Bla x = new Bla static class NewX extends P { CircularFifoBuffer<S> buf = new CircularFifoBuffer<S>(4); void write(S tok) { buf.add(tok); } S read() { if (buf.size() == 4) { L<S> l = buf.asList(); if (l.get(2).equals("=") && l.get(3).equals("new")) return l.get(0); } return null; } } p { tok = makeCorpusJavaTok(numSnippets); print("Tokens in corpus: " + tok.size()); collector = new Collector; test(new Pairs); test(new Tuples(2)); test(new Tuples(3)); test(new Tuples(4)); test(new Chain(new Tuples(2), new Pairs)); test(new Chain(new Tuples(4), new Tuples(3), new Tuples(2), new Pairs)); test(new Chain(new NewX, new Tuples(4), new Tuples(3), new Tuples(2), new Pairs)); if (collector.winner != null && showGUI) { predicted = collector.predicted; showColoredText(); } } // test a predictor static void test(P p) { predicted = new TreeSet<int>(); int points = 0, total = 0; for (int i = 1; i < tok.size(); i += 2) { S t = tok.get(i); S x = p.read(); boolean correct = t.equals(x); total += t.length(); if (correct) { predicted.add(i); points += t.length(); } p.write(t); } double score = points*100.0/total; collector.add(p, score); } static void showColoredText() ctex { JFrame jf = new JFrame("Predicted = green"); Container cp = jf.getContentPane(); JTextPane pane = new JTextPane(); //pane.setFont(loadFont("#1000993", 24)); Document doc = pane.getStyledDocument(); int i = tok.size(), len = 0; while (len <= maxCharsGUI && i > 0) { --i; len += tok.get(i).length(); } for (; i < tok.size(); i++) { if (tok.get(i).length() == 0) continue; boolean green = predicted.contains(i); SimpleAttributeSet set = new SimpleAttributeSet(); StyleConstants.setForeground(set, green ? Color.green : Color.gray); doc.insertString(doc.getLength(), tok.get(i), set); } JScrollPane scrollPane = new JScrollPane(pane); cp.add(scrollPane, BorderLayout.CENTER); jf.setBounds(100, 100, 600, 600); jf.setVisible(true); } !include #1000989 // SnippetDB static L<S> makeCorpusJavaTok(int numSnippets) { SnippetDB db = new SnippetDB("#1000673"); List<List<S>> rows = db.rowsOrderedBy("sn_created"); new L<S> tok; for (int i = 0; i < Math.min(rows.size(), numSnippets); i++) { new StringBuilder buf; S id = db.getField(rows.get(i), "sn_id"); S title = db.getField(rows.get(i), "sn_title"); S text = db.getField(rows.get(i), "sn_text"); buf.append("\n== ID: " + id); buf.append("\n== Title: " + title); buf.append("\n==\n"); buf.append(text).append("\n"); if (tok.size() != 0) tok.remove(tok.size()-1); tok.addAll(javaTok(buf.toString())); ++i; } return tok; } static class Collector { P winner; double bestScore = -1; Set<int> predicted; void add(P p, double score) { if (winner == null || score > bestScore) { winner = p; bestScore = score; print("New best score: " + formatDouble(score, 2) + "% (" + shorten(structure(p), 100) + ")"); this.predicted = main.predicted; } } } }
Began life as a copy of #1000991
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1000995 |
Snippet name: | Token prediction, multiple predictors |
Eternal ID of this version: | #1000995/1 |
Text MD5: | 11bef061c74da579d77f57eacd842ca3 |
Transpilation MD5: | 2850bc9c2d210916bf4ecb2bf53c196d |
Author: | stefan |
Category: | |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-09-14 21:36:10 |
Source code size: | 4998 bytes / 198 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 732 / 714 |
Referenced in: | #1001000 - Token prediction, multiple predictors (improving architecture) #3000189 - Answer for stefanreich(>> t bla) #3000190 - Answer for stefanreich(>> t 20 questions) #3000382 - Answer for ferdie (>> t = 1, f = 0) #3000383 - Answer for funkoverflow (>> t=1, f=0 okay) |