Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

198
LINES

< > BotCompany Repo | #1000995 // Token prediction, multiple predictors

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (2400L/19K/57K).

1  
!747
2  
3  
abstract class P {
4  
  abstract void write(S tok);
5  
  abstract S read();
6  
}
7  
8  
!include #1000774 // CircularFifoBuffer
9  
10  
m {
11  
  static int numSnippets = 2000;
12  
  static boolean showGUI = true;
13  
  static int maxCharsGUI = 500000;
14  
  
15  
  static Collector collector;
16  
  static L<S> tok;
17  
  static Set<int> predicted;
18  
  
19  
  static class Chain extends P {
20  
    new L<P> list;
21  
    
22  
    *() {}
23  
    *(L<P> *list) {}
24  
    *(P... a) { list = asList(a); }
25  
    
26  
    void add(P p) { list.add(p); }
27  
    
28  
    void write(S tok) {
29  
      for (P p : list)
30  
        p.write(tok);
31  
    }
32  
    
33  
    S read() {
34  
      for (P p : list) {
35  
        S s = p.read();
36  
        if (s != null) return s;
37  
      }
38  
      return null;
39  
    }
40  
  }
41  
    
42  
  static class Pairs extends P {
43  
    new Map<S,S> map;
44  
    S last;
45  
46  
    void write(S tok) {
47  
      if (last != null)
48  
        map.put(last, tok);
49  
      last = tok;
50  
    }
51  
52  
    S read() {
53  
      return last == null ? null : map.get(last);
54  
    }
55  
  }
56  
57  
  static class Tuples extends P {
58  
    Map<L<S>,S> map = new HashMap<L<S>,S>();
59  
    int n;
60  
    CircularFifoBuffer<S> buf;
61  
62  
    *(int *n) {
63  
      buf = new CircularFifoBuffer<S>(n);
64  
    }
65  
    
66  
    void write(S tok) {
67  
      if (buf.size() == n)
68  
        map.put(new ArrayList<S>(buf.getBackingStore()), tok);
69  
      buf.add(tok);
70  
    }
71  
72  
    S read() {
73  
      if (buf.size() == n)
74  
        return map.get(new ArrayList<S>(buf.getBackingStore()));
75  
      return null;
76  
    }
77  
  }
78  
  
79  
  // Bla x = new Bla
80  
  static class NewX extends P {
81  
    CircularFifoBuffer<S> buf = new CircularFifoBuffer<S>(4);
82  
    
83  
    void write(S tok) { buf.add(tok); }
84  
85  
    S read() {
86  
      if (buf.size() == 4) {
87  
        L<S> l = buf.asList();
88  
        if (l.get(2).equals("=") && l.get(3).equals("new"))
89  
          return l.get(0);
90  
      }
91  
      return null;
92  
    }
93  
  }
94  
  
95  
  p {
96  
    tok = makeCorpusJavaTok(numSnippets);
97  
    print("Tokens in corpus: " + tok.size());
98  
    
99  
    collector = new Collector;
100  
    test(new Pairs);
101  
    test(new Tuples(2));
102  
    test(new Tuples(3));
103  
    test(new Tuples(4));
104  
    test(new Chain(new Tuples(2), new Pairs));
105  
    test(new Chain(new Tuples(4), new Tuples(3), new Tuples(2), new Pairs));
106  
    test(new Chain(new NewX, new Tuples(4), new Tuples(3), new Tuples(2), new Pairs));
107  
108  
    if (collector.winner != null && showGUI) {
109  
      predicted = collector.predicted;
110  
      showColoredText();
111  
    }
112  
  }
113  
  
114  
  // test a predictor
115  
  static void test(P p) {
116  
    predicted = new TreeSet<int>();
117  
    int points = 0, total = 0;
118  
    for (int i = 1; i < tok.size(); i += 2) {
119  
      S t = tok.get(i);
120  
      S x = p.read();
121  
      boolean correct = t.equals(x);
122  
      total += t.length();
123  
      if (correct) {
124  
        predicted.add(i);
125  
        points += t.length();
126  
      }
127  
      p.write(t);
128  
    }
129  
    double score = points*100.0/total;
130  
    collector.add(p, score);
131  
  }
132  
  
133  
  static void showColoredText() ctex {
134  
    JFrame jf = new JFrame("Predicted = green");
135  
    Container cp = jf.getContentPane();
136  
137  
    JTextPane pane = new JTextPane();
138  
    //pane.setFont(loadFont("#1000993", 24));
139  
    Document doc = pane.getStyledDocument();
140  
141  
    int i = tok.size(), len = 0;
142  
    while (len <= maxCharsGUI && i > 0) {
143  
      --i;
144  
      len += tok.get(i).length();
145  
    }
146  
    
147  
    for (; i < tok.size(); i++) {
148  
      if (tok.get(i).length() == 0) continue;
149  
      boolean green = predicted.contains(i);
150  
      SimpleAttributeSet set = new SimpleAttributeSet();
151  
      StyleConstants.setForeground(set, green ? Color.green : Color.gray);
152  
      doc.insertString(doc.getLength(), tok.get(i), set);
153  
    }
154  
    
155  
    JScrollPane scrollPane = new JScrollPane(pane);
156  
    cp.add(scrollPane, BorderLayout.CENTER);
157  
158  
    jf.setBounds(100, 100, 600, 600);
159  
    jf.setVisible(true);
160  
  }
161  
  
162  
  !include #1000989 // SnippetDB
163  
  
164  
  static L<S> makeCorpusJavaTok(int numSnippets) {
165  
    SnippetDB db = new SnippetDB("#1000673");
166  
    List<List<S>> rows = db.rowsOrderedBy("sn_created");
167  
    new L<S> tok;
168  
    for (int i = 0; i < Math.min(rows.size(), numSnippets); i++) {
169  
      new StringBuilder buf;
170  
      S id = db.getField(rows.get(i), "sn_id");
171  
      S title = db.getField(rows.get(i), "sn_title");
172  
      S text = db.getField(rows.get(i), "sn_text");
173  
      buf.append("\n== ID: " + id);
174  
      buf.append("\n== Title: " + title);
175  
      buf.append("\n==\n");
176  
      buf.append(text).append("\n");
177  
      if (tok.size() != 0) tok.remove(tok.size()-1);
178  
      tok.addAll(javaTok(buf.toString()));
179  
      ++i;
180  
    }
181  
    return tok;
182  
  }
183  
  
184  
  static class Collector {
185  
    P winner;
186  
    double bestScore = -1;
187  
    Set<int> predicted;
188  
189  
    void add(P p, double score) {
190  
      if (winner == null || score > bestScore) {
191  
        winner = p;
192  
        bestScore = score;
193  
        print("New best score: " + formatDouble(score, 2) + "% (" + shorten(structure(p), 100) + ")");
194  
        this.predicted = main.predicted;
195  
      }
196  
    }
197  
  }
198  
}

Author comment

Began life as a copy of #1000991

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1000995
Snippet name: Token prediction, multiple predictors
Eternal ID of this version: #1000995/1
Text MD5: 11bef061c74da579d77f57eacd842ca3
Transpilation MD5: 2850bc9c2d210916bf4ecb2bf53c196d
Author: stefan
Category:
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-09-14 21:36:10
Source code size: 4998 bytes / 198 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 652 / 613
Referenced in: [show references]