Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

221
LINES

< > BotCompany Repo | #1001000 // Token prediction, multiple predictors (improving architecture)

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (2289L/18K/55K).

1  
!747
2  
3  
abstract class P {
4  
  abstract S read(L<S> tok);
5  
  abstract P derive(); // clone & reset counter for actual use
6  
}
7  
8  
m {
9  
  static S corpusID = "#1001006";
10  
  static int numSnippets = 3000;
11  
  static boolean showGUI = true;
12  
  static int maxCharsGUI = 500000;
13  
  
14  
  static Collector collector;
15  
  static L<S> tok;
16  
  static Set<int> predicted;
17  
  
18  
  static class Chain extends P {
19  
    new L<P> list;
20  
    
21  
    *() {}
22  
    *(L<P> *list) {}
23  
    *(P... a) { list = asList(a); }
24  
    
25  
    void add(P p) { list.add(p); }
26  
    
27  
    S read(L<S> tok) {
28  
      for (P p : list) {
29  
        S s = p.read(tok);
30  
        if (s != null) return s;
31  
      }
32  
      return null;
33  
    }
34  
    
35  
    P derive() {
36  
      new Chain c;
37  
      for (P p : list)
38  
        c.add(p.derive());
39  
      return c;
40  
    }
41  
  }
42  
    
43  
  static class Tuples extends P {
44  
    Map<L<S>,S> map = new HashMap<L<S>,S>();
45  
    int n, seen;
46  
47  
    *(int *n) {
48  
    }
49  
    
50  
    S read(L<S> tok) {
51  
      while (tok.size() > seen) {
52  
        ++seen;
53  
        if (seen > n)
54  
          map.put(new ArrayList<S>(tok.subList(seen-n-1, seen-1)), tok.get(seen-1));
55  
      }
56  
      
57  
      if (tok.size() >= n)
58  
        return map.get(new ArrayList<S>(tok.subList(tok.size()-n, tok.size())));
59  
        
60  
      return null;
61  
    }
62  
    
63  
    // slow...
64  
    P oldDerive() {
65  
      Tuples t = new Tuples(n);
66  
      t.map.putAll(map);
67  
      // t.seen == 0 which is ok
68  
      return t;
69  
    }
70  
    
71  
    // fast!
72  
    P derive() {
73  
      Tuples t = new Tuples(n);
74  
      t.map = new DerivedHashMap<L<S>,S>(map);
75  
      return t;
76  
    }
77  
  }
78  
  
79  
  static class DerivedHashMap<A, B> extends AbstractMap<A, B> {
80  
    Map<A, B> base;
81  
    new HashMap<A, B> additions;
82  
    
83  
    *(Map<A, B> *base) {}
84  
    
85  
    public B get(Object key) {
86  
      B b = additions.get(key);
87  
      if (b != null) return b;
88  
      return base.get(key);
89  
    }
90  
    
91  
    public B put(A key, B value) {
92  
      return additions.put(key, value);
93  
    }
94  
    
95  
    public Set<Map.Entry<A,B>> entrySet() {
96  
      throw fail();
97  
    }
98  
  }
99  
  
100  
  // TODO: Put NewX back in
101  
  
102  
  p {
103  
    tok = makeCorpusJavaTok(numSnippets);
104  
    print("Tokens in corpus: " + tok.size());
105  
    
106  
    print("Learning...");
107  
    collector = new Collector;
108  
    /*test(new Tuples(1));
109  
    test(new Tuples(2));
110  
    test(new Tuples(3));
111  
    test(new Tuples(4));
112  
    test(new Chain(new Tuples(2), new Tuples(1)));*/
113  
    test(new Chain(new Tuples(4), new Tuples(3), new Tuples(2), new Tuples(1)));
114  
115  
    print("Learning done.");
116  
    if (collector.winner != null && showGUI) {
117  
      predicted = collector.predicted;
118  
      showColoredText();
119  
    }
120  
  }
121  
  
122  
  // test a predictor
123  
  static void test(P p) {
124  
    predicted = new TreeSet<int>();
125  
    int points = 0, total = 0, lastPercent = 0;
126  
    new L<S> history;
127  
    for (int i = 1; i < tok.size(); i += 2) {
128  
      S t = tok.get(i);
129  
      S x = p.read(history);
130  
      boolean correct = t.equals(x);
131  
      total += t.length();
132  
      if (correct) {
133  
        predicted.add(i);
134  
        points += t.length();
135  
      }
136  
      history.add(t);
137  
      int percent = roundUpTo(10, (int) (i*100L/tok.size()));
138  
      if (percent > lastPercent) {
139  
        print("Learning " + percent + "% done.");
140  
        lastPercent = percent;
141  
      }
142  
    }
143  
    double score = points*100.0/total;
144  
    collector.add(p, score);
145  
  }
146  
  
147  
  static void showColoredText() ctex {
148  
    JFrame jf = new JFrame("Predicted = green");
149  
    Container cp = jf.getContentPane();
150  
151  
    JTextPane pane = new JTextPane();
152  
    //pane.setFont(loadFont("#1000993", 24));
153  
    Document doc = pane.getStyledDocument();
154  
155  
    int i = tok.size(), len = 0;
156  
    while (len <= maxCharsGUI && i > 0) {
157  
      --i;
158  
      len += tok.get(i).length();
159  
    }
160  
    
161  
    for (; i < tok.size(); i++) {
162  
      if (tok.get(i).length() == 0) continue;
163  
      boolean green = predicted.contains(i);
164  
      SimpleAttributeSet set = new SimpleAttributeSet();
165  
      StyleConstants.setForeground(set, green ? Color.green : Color.gray);
166  
      doc.insertString(doc.getLength(), tok.get(i), set);
167  
    }
168  
    
169  
    JScrollPane scrollPane = new JScrollPane(pane);
170  
    cp.add(scrollPane, BorderLayout.CENTER);
171  
172  
    jf.setBounds(100, 100, 600, 600);
173  
    jf.setVisible(true);
174  
  }
175  
  
176  
  !include #1000989 // SnippetDB
177  
  
178  
  static L<S> makeCorpusJavaTok(int numSnippets) {
179  
    SnippetDB db = new SnippetDB(corpusID);
180  
    List<List<S>> rows = db.rowsOrderedBy("sn_created");
181  
    new L<S> tok;
182  
    for (int i = 0; i < Math.min(rows.size(), numSnippets); i++) {
183  
      new StringBuilder buf;
184  
      S id = db.getField(rows.get(i), "sn_id");
185  
      S title = db.getField(rows.get(i), "sn_title");
186  
      S text = db.getField(rows.get(i), "sn_text");
187  
      buf.append("\n== ID: " + id);
188  
      buf.append("\n== Title: " + title);
189  
      buf.append("\n==\n");
190  
      buf.append(text).append("\n");
191  
      if (tok.size() != 0) tok.remove(tok.size()-1);
192  
      tok.addAll(javaTok(buf.toString()));
193  
      ++i;
194  
    }
195  
    return internAll(tok);
196  
  }
197  
  
198  
  static L<S> internAll(L<S> tok) {
199  
    new L<S> l;
200  
    for (S t : tok)
201  
      l.add(t.intern());
202  
    return l;
203  
  }
204  
  
205  
  static class Collector {
206  
    P winner;
207  
    double bestScore = -1;
208  
    Set<int> predicted;
209  
210  
    void add(P p, double score) {
211  
      if (winner == null || score > bestScore) {
212  
        winner = p;
213  
        bestScore = score;
214  
        //S name = shorten(structure(p), 100);
215  
        S name = p.getClass().getName();
216  
        print("New best score: " + formatDouble(score, 2) + "% (" + name + ")");
217  
        this.predicted = main.predicted;
218  
      }
219  
    }
220  
  }
221  
}

Author comment

Began life as a copy of #1000995

download  show line numbers  debug dex  old transpilations   

Travelled to 16 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, jtubtzbbkimh, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1001000
Snippet name: Token prediction, multiple predictors (improving architecture)
Eternal ID of this version: #1001000/1
Text MD5: 69216550a151d4f135d39f4547654aec
Transpilation MD5: 5fd815aa086d3473101aa689540506e8
Author: stefan
Category:
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-09-15 20:55:52
Source code size: 5669 bytes / 221 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 740 / 802
Referenced in: [show references]