Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

258
LINES

< > BotCompany Repo | #705 // An experiment with predictors

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (512L/5K/15K).

1  
!636
2  
!quicknew
3  
!688 // buf.isEmpty
4  
!standard functions
5  
6  
abstract class Predictor {
7  
  public abstract String predict(String s, int chars);
8  
  public void preload(String s) {}
9  
}
10  
11  
main {
12  
  static new (Hash)Set<String> debug;
13  
  
14  
  psvm {
15  
    String input = "abcdefghijklmnopqrstuvwxyz";
16  
    new List<String> preloads;
17  
    
18  
    for (int i = 0; i < args.length; i++) {
19  
      String arg = args[i];
20  
      if (arg.equals("debug")) {
21  
        String s = args[++i];
22  
        debug.add(s);
23  
        debugOn(s);
24  
      } else if (isSnippetID(arg))
25  
        input = loadSnippet(arg);
26  
      else if (arg.equals("preload")) {
27  
        String s = args[++i];
28  
        preloads.add(loadSnippet(s));
29  
      }
30  
    }
31  
 
32  
    input = input.replace("\r", "");   
33  
    
34  
    new List<Predictor> predictors;
35  
    add(predictors, new PLin, new PWords, new PIndent);
36  
    
37  
    for (Predictor p : predictors)
38  
      for (String s : preloads)
39  
        p.preload(s);
40  
    
41  
    new Collector globalCollector;
42  
    int skips = 0, totalSkip = 0;
43  
    new StringBuilder compacted;
44  
45  
    for (int splitPoint = 0; splitPoint < input.length(); splitPoint++) {
46  
      String before = input.substring(0, splitPoint);
47  
      String rest = input.substring(splitPoint);
48  
      new Collector collector;
49  
      for (Predictor p : predictors) {
50  
        boolean doDebug = debug.contains(p.getClass().getName().replaceAll("^main\\$", ""));
51  
        int chars = rest.length();
52  
        String prediction = "";
53  
        try {
54  
          prediction = p.predict(before, chars);
55  
          if (doDebug)
56  
            System.out.println("Actual prediction: " + prediction);
57  
        } catch (Throwable e) {
58  
          // silent exception
59  
        }
60  
          
61  
        // convenience result fixing for the predictors
62  
        if (prediction == null) prediction = "";
63  
        if (prediction.length() > chars)
64  
          prediction = prediction.substring(0, chars);
65  
          
66  
        String actual = rest.length() > chars ? rest.substring(0, chars) : rest;
67  
        int improvement = getScore2(prediction, actual);
68  
        collector.add(p, prediction, improvement, splitPoint);
69  
        
70  
        if (doDebug) {
71  
          String expected = actual.substring(0, Math.min(prediction.length()+5, rest.length()));
72  
          System.out.println(splitPoint + "*" + improvement + " " + structure(p) + " -> " + quote(prediction) + " vs " + quote(expected) + " (error " + (actual.length()-improvement) + " of " + rest.length() + ")");
73  
        }
74  
        
75  
        globalCollector.add(p, prediction, improvement, splitPoint);
76  
      }
77  
      
78  
      int skip = 0;
79  
      if (collector.bestScore > 0) {
80  
        System.out.println(splitPoint + " " + collector.bestScore + " " + structure(collector.winner));
81  
        
82  
        skip = commonPrefix(rest, collector.winnerResult).length();
83  
      }
84  
       
85  
      if (skip != 0) {
86  
        System.out.println("Skipping " + skip + ": " + quote(collector.winnerResult.substring(0, skip)));
87  
        splitPoint += skip-1;
88  
        totalSkip += skip;
89  
        ++skips;
90  
        compacted.append('*');
91  
      } else
92  
        compacted.append(input.charAt(splitPoint));
93  
    }
94  
    
95  
    System.out.println("\n" + compacted + "\n\n");
96  
    
97  
    System.out.println("Highest score seen: " + globalCollector.bestScore + " by " + structure(globalCollector.winner) + " at " + globalCollector.splitPoint);
98  
    System.out.println("Total characters skipped: " + totalSkip + "/" + input.length() + " (skips: " + skips + ")");
99  
  }
100  
  
101  
  static class Collector {
102  
    Predictor winner;
103  
    String winnerResult;
104  
    long bestScore = -1;
105  
    int splitPoint;
106  
    
107  
    void add(Predictor p, String result, long score, int splitPoint) {
108  
      if (winner == null || score > bestScore) {
109  
        winner = p;
110  
        winnerResult = result;
111  
        bestScore = score;
112  
        this.splitPoint = splitPoint;
113  
      }
114  
    }
115  
  }
116  
  
117  
  !include #1000388 // "leven" function (Levenshtein distance)
118  
  
119  
  /*static class P0 extends Predictor {
120  
    public String predict(String s, int chars) {
121  
      return "";
122  
    }
123  
  }
124  
  
125  
  static class P1 extends Predictor {
126  
    public String predict(String s, int chars) {
127  
      return s;
128  
    }
129  
  }*/
130  
  
131  
  static class PLin extends Predictor {
132  
    public String predict(String s, int chars) {
133  
      if (s.length() < 2) return "";
134  
      char a = s.charAt(s.length()-2);
135  
      char b = s.charAt(s.length()-1);
136  
      int step = (char) (((int) b) - (int) a);
137  
      new StringBuilder buf;
138  
      for (int i = 0; i < chars; i++) {
139  
        char c = (char) (((int) a) + step*(i+2));
140  
        buf.append(c);
141  
      }
142  
      return buf.toString();
143  
    }
144  
  }
145  
  
146  
  static <C> void add(Collection<C> c, C... objects) {
147  
    for (C x : objects) c.add(x);
148  
  }
149  
  
150  
  static String structure(Object o) {
151  
    String name = o.getClass().getName();
152  
    
153  
    new StringBuilder buf;
154  
    
155  
    if (o instanceof Collection) {
156  
      for (Object x : (Collection) o) {
157  
        if (!buf.isEmpty()) buf.append(", ");
158  
        buf.append(structure(x));
159  
      }
160  
      return "{" + buf + "}";
161  
    }
162  
    
163  
    // Need more cases? This should cover all library classes...
164  
    if (name.startsWith("java.") || name.startsWith("javax."))
165  
      return String.valueOf(o);
166  
      
167  
    String shortName = o.getClass().getName().replaceAll("^main\\$", "");
168  
169  
    // TODO: go to superclasses too
170  
    Field[] fields = o.getClass().getDeclaredFields();
171  
    for (Field field : fields) {
172  
      if ((field.getModifiers() & Modifier.STATIC) != 0)
173  
        continue;
174  
      Object value;
175  
      try {
176  
        value = field.get(o);
177  
      } catch (Exception e) {
178  
        value = "?";
179  
      }
180  
      
181  
      String fieldName = field.getName();
182  
      
183  
      // special case for PWords - show only number of preloaded words
184  
      if (shortName.equals("PWords") && field.getName().equals("preloaded"))
185  
        value = ((Collection) value).size();
186  
187  
      if (!buf.isEmpty()) buf.append(", ");
188  
      buf.append(fieldName + "=" + structure(value));
189  
    }
190  
    String s = shortName;
191  
    if (!buf.isEmpty())
192  
      s += "(" + buf + ")";
193  
    return s;
194  
  }
195  
  
196  
  static int getScore1(String prediction, String rest) {
197  
    int error = leven(prediction, rest);
198  
    return rest.length()-error;
199  
  }
200  
  
201  
  static int getScore2(String prediction, String rest) {
202  
    return commonPrefix(prediction, rest).length();
203  
  }
204  
  
205  
  static class PWords extends Predictor {
206  
    static boolean debug;
207  
    new (Tree)Set<String> preloaded;
208  
    
209  
    public void preload(String s) {
210  
      preloaded.addAll(findWords(s));
211  
    }
212  
    
213  
    public String predict(String s, int chars) {
214  
      Set<String> words = findWords(s);
215  
      words.addAll(preloaded);
216  
      String word = match("\\w+$", s);
217  
      if (word == null) return "";
218  
      if (debug)
219  
        System.out.println("Looking for: " + word);
220  
      for (String w : words)
221  
        if (w.startsWith(word)) {
222  
          String pred = w.substring(word.length());
223  
          if (debug)
224  
            System.out.println("PWords: predicted " + quote(pred) + " for " + quote(word) + " based on word " + quote(w));
225  
          return pred;
226  
        }
227  
      return "";
228  
    }
229  
    
230  
    boolean firstTime = true;
231  
    Set<String> findWords(String s) {
232  
      Set<String> words = new TreeSet<String>(
233  
        Collections.reverseOrder()); // important so partial matches come later
234  
      words.addAll(matchAll("\\w+", s));
235  
      if (debug && firstTime && s.length() >= 100) {
236  
        firstTime = false;
237  
        //System.out.println("Words found: " + structure(words));
238  
      }
239  
      return words;
240  
    }
241  
  }
242  
243  
  static String match(String pattern, String text) {
244  
    List<String> matches = new ArrayList<String>();
245  
    Matcher matcher = Pattern.compile(pattern).matcher(text);
246  
    return matcher.find() ? matcher.group() : null;
247  
  }
248  
  
249  
  static class PIndent extends Predictor {
250  
    public String predict(String s, int chars) {
251  
      int i = s.lastIndexOf('\n');
252  
      int j = i+1;
253  
      while (j < s.length() && (s.charAt(j) == ' ' || s.charAt(j) == '\t'))
254  
        ++j;
255  
      return "\n" + s.substring(i, j);
256  
    }
257  
  }
258  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #705
Snippet name: An experiment with predictors
Eternal ID of this version: #705/1
Text MD5: 8059a7beeef1b00168b04cfdf9dae892
Transpilation MD5: dc3c4fae9e9a22d11ea51f435459ad77
Author: stefan
Category:
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-08-03 00:38:34
Source code size: 8213 bytes / 258 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 600 / 692
Referenced in: [show references]