Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

198
LINES

< > BotCompany Repo | #1000995 // Token prediction, multiple predictors

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (2400L/19K/57K).

!747

abstract class P {
  abstract void write(S tok);
  abstract S read();
}

!include #1000774 // CircularFifoBuffer

m {
  static int numSnippets = 2000;
  static boolean showGUI = true;
  static int maxCharsGUI = 500000;
  
  static Collector collector;
  static L<S> tok;
  static Set<int> predicted;
  
  static class Chain extends P {
    new L<P> list;
    
    *() {}
    *(L<P> *list) {}
    *(P... a) { list = asList(a); }
    
    void add(P p) { list.add(p); }
    
    void write(S tok) {
      for (P p : list)
        p.write(tok);
    }
    
    S read() {
      for (P p : list) {
        S s = p.read();
        if (s != null) return s;
      }
      return null;
    }
  }
    
  static class Pairs extends P {
    new Map<S,S> map;
    S last;

    void write(S tok) {
      if (last != null)
        map.put(last, tok);
      last = tok;
    }

    S read() {
      return last == null ? null : map.get(last);
    }
  }

  static class Tuples extends P {
    Map<L<S>,S> map = new HashMap<L<S>,S>();
    int n;
    CircularFifoBuffer<S> buf;

    *(int *n) {
      buf = new CircularFifoBuffer<S>(n);
    }
    
    void write(S tok) {
      if (buf.size() == n)
        map.put(new ArrayList<S>(buf.getBackingStore()), tok);
      buf.add(tok);
    }

    S read() {
      if (buf.size() == n)
        return map.get(new ArrayList<S>(buf.getBackingStore()));
      return null;
    }
  }
  
  // Bla x = new Bla
  static class NewX extends P {
    CircularFifoBuffer<S> buf = new CircularFifoBuffer<S>(4);
    
    void write(S tok) { buf.add(tok); }

    S read() {
      if (buf.size() == 4) {
        L<S> l = buf.asList();
        if (l.get(2).equals("=") && l.get(3).equals("new"))
          return l.get(0);
      }
      return null;
    }
  }
  
  p {
    tok = makeCorpusJavaTok(numSnippets);
    print("Tokens in corpus: " + tok.size());
    
    collector = new Collector;
    test(new Pairs);
    test(new Tuples(2));
    test(new Tuples(3));
    test(new Tuples(4));
    test(new Chain(new Tuples(2), new Pairs));
    test(new Chain(new Tuples(4), new Tuples(3), new Tuples(2), new Pairs));
    test(new Chain(new NewX, new Tuples(4), new Tuples(3), new Tuples(2), new Pairs));

    if (collector.winner != null && showGUI) {
      predicted = collector.predicted;
      showColoredText();
    }
  }
  
  // test a predictor
  static void test(P p) {
    predicted = new TreeSet<int>();
    int points = 0, total = 0;
    for (int i = 1; i < tok.size(); i += 2) {
      S t = tok.get(i);
      S x = p.read();
      boolean correct = t.equals(x);
      total += t.length();
      if (correct) {
        predicted.add(i);
        points += t.length();
      }
      p.write(t);
    }
    double score = points*100.0/total;
    collector.add(p, score);
  }
  
  static void showColoredText() ctex {
    JFrame jf = new JFrame("Predicted = green");
    Container cp = jf.getContentPane();

    JTextPane pane = new JTextPane();
    //pane.setFont(loadFont("#1000993", 24));
    Document doc = pane.getStyledDocument();

    int i = tok.size(), len = 0;
    while (len <= maxCharsGUI && i > 0) {
      --i;
      len += tok.get(i).length();
    }
    
    for (; i < tok.size(); i++) {
      if (tok.get(i).length() == 0) continue;
      boolean green = predicted.contains(i);
      SimpleAttributeSet set = new SimpleAttributeSet();
      StyleConstants.setForeground(set, green ? Color.green : Color.gray);
      doc.insertString(doc.getLength(), tok.get(i), set);
    }
    
    JScrollPane scrollPane = new JScrollPane(pane);
    cp.add(scrollPane, BorderLayout.CENTER);

    jf.setBounds(100, 100, 600, 600);
    jf.setVisible(true);
  }
  
  !include #1000989 // SnippetDB
  
  static L<S> makeCorpusJavaTok(int numSnippets) {
    SnippetDB db = new SnippetDB("#1000673");
    List<List<S>> rows = db.rowsOrderedBy("sn_created");
    new L<S> tok;
    for (int i = 0; i < Math.min(rows.size(), numSnippets); i++) {
      new StringBuilder buf;
      S id = db.getField(rows.get(i), "sn_id");
      S title = db.getField(rows.get(i), "sn_title");
      S text = db.getField(rows.get(i), "sn_text");
      buf.append("\n== ID: " + id);
      buf.append("\n== Title: " + title);
      buf.append("\n==\n");
      buf.append(text).append("\n");
      if (tok.size() != 0) tok.remove(tok.size()-1);
      tok.addAll(javaTok(buf.toString()));
      ++i;
    }
    return tok;
  }
  
  static class Collector {
    P winner;
    double bestScore = -1;
    Set<int> predicted;

    void add(P p, double score) {
      if (winner == null || score > bestScore) {
        winner = p;
        bestScore = score;
        print("New best score: " + formatDouble(score, 2) + "% (" + shorten(structure(p), 100) + ")");
        this.predicted = main.predicted;
      }
    }
  }
}

Author comment

Began life as a copy of #1000991

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1000995
Snippet name: Token prediction, multiple predictors
Eternal ID of this version: #1000995/1
Text MD5: 11bef061c74da579d77f57eacd842ca3
Transpilation MD5: 2850bc9c2d210916bf4ecb2bf53c196d
Author: stefan
Category:
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-09-14 21:36:10
Source code size: 4998 bytes / 198 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 649 / 610
Referenced in: [show references]