Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

258
LINES

< > BotCompany Repo | #705 // An experiment with predictors

JavaX source code [tags: use-pretranspiled] - run with: x30.jar

Libraryless. Click here for Pure Java version (512L/5K/15K).

!636
!quicknew
!688 // buf.isEmpty
!standard functions

abstract class Predictor {
  public abstract String predict(String s, int chars);
  public void preload(String s) {}
}

main {
  static new (Hash)Set<String> debug;
  
  psvm {
    String input = "abcdefghijklmnopqrstuvwxyz";
    new List<String> preloads;
    
    for (int i = 0; i < args.length; i++) {
      String arg = args[i];
      if (arg.equals("debug")) {
        String s = args[++i];
        debug.add(s);
        debugOn(s);
      } else if (isSnippetID(arg))
        input = loadSnippet(arg);
      else if (arg.equals("preload")) {
        String s = args[++i];
        preloads.add(loadSnippet(s));
      }
    }
 
    input = input.replace("\r", "");   
    
    new List<Predictor> predictors;
    add(predictors, new PLin, new PWords, new PIndent);
    
    for (Predictor p : predictors)
      for (String s : preloads)
        p.preload(s);
    
    new Collector globalCollector;
    int skips = 0, totalSkip = 0;
    new StringBuilder compacted;

    for (int splitPoint = 0; splitPoint < input.length(); splitPoint++) {
      String before = input.substring(0, splitPoint);
      String rest = input.substring(splitPoint);
      new Collector collector;
      for (Predictor p : predictors) {
        boolean doDebug = debug.contains(p.getClass().getName().replaceAll("^main\\$", ""));
        int chars = rest.length();
        String prediction = "";
        try {
          prediction = p.predict(before, chars);
          if (doDebug)
            System.out.println("Actual prediction: " + prediction);
        } catch (Throwable e) {
          // silent exception
        }
          
        // convenience result fixing for the predictors
        if (prediction == null) prediction = "";
        if (prediction.length() > chars)
          prediction = prediction.substring(0, chars);
          
        String actual = rest.length() > chars ? rest.substring(0, chars) : rest;
        int improvement = getScore2(prediction, actual);
        collector.add(p, prediction, improvement, splitPoint);
        
        if (doDebug) {
          String expected = actual.substring(0, Math.min(prediction.length()+5, rest.length()));
          System.out.println(splitPoint + "*" + improvement + " " + structure(p) + " -> " + quote(prediction) + " vs " + quote(expected) + " (error " + (actual.length()-improvement) + " of " + rest.length() + ")");
        }
        
        globalCollector.add(p, prediction, improvement, splitPoint);
      }
      
      int skip = 0;
      if (collector.bestScore > 0) {
        System.out.println(splitPoint + " " + collector.bestScore + " " + structure(collector.winner));
        
        skip = commonPrefix(rest, collector.winnerResult).length();
      }
       
      if (skip != 0) {
        System.out.println("Skipping " + skip + ": " + quote(collector.winnerResult.substring(0, skip)));
        splitPoint += skip-1;
        totalSkip += skip;
        ++skips;
        compacted.append('*');
      } else
        compacted.append(input.charAt(splitPoint));
    }
    
    System.out.println("\n" + compacted + "\n\n");
    
    System.out.println("Highest score seen: " + globalCollector.bestScore + " by " + structure(globalCollector.winner) + " at " + globalCollector.splitPoint);
    System.out.println("Total characters skipped: " + totalSkip + "/" + input.length() + " (skips: " + skips + ")");
  }
  
  static class Collector {
    Predictor winner;
    String winnerResult;
    long bestScore = -1;
    int splitPoint;
    
    void add(Predictor p, String result, long score, int splitPoint) {
      if (winner == null || score > bestScore) {
        winner = p;
        winnerResult = result;
        bestScore = score;
        this.splitPoint = splitPoint;
      }
    }
  }
  
  !include #1000388 // "leven" function (Levenshtein distance)
  
  /*static class P0 extends Predictor {
    public String predict(String s, int chars) {
      return "";
    }
  }
  
  static class P1 extends Predictor {
    public String predict(String s, int chars) {
      return s;
    }
  }*/
  
  static class PLin extends Predictor {
    public String predict(String s, int chars) {
      if (s.length() < 2) return "";
      char a = s.charAt(s.length()-2);
      char b = s.charAt(s.length()-1);
      int step = (char) (((int) b) - (int) a);
      new StringBuilder buf;
      for (int i = 0; i < chars; i++) {
        char c = (char) (((int) a) + step*(i+2));
        buf.append(c);
      }
      return buf.toString();
    }
  }
  
  static <C> void add(Collection<C> c, C... objects) {
    for (C x : objects) c.add(x);
  }
  
  static String structure(Object o) {
    String name = o.getClass().getName();
    
    new StringBuilder buf;
    
    if (o instanceof Collection) {
      for (Object x : (Collection) o) {
        if (!buf.isEmpty()) buf.append(", ");
        buf.append(structure(x));
      }
      return "{" + buf + "}";
    }
    
    // Need more cases? This should cover all library classes...
    if (name.startsWith("java.") || name.startsWith("javax."))
      return String.valueOf(o);
      
    String shortName = o.getClass().getName().replaceAll("^main\\$", "");

    // TODO: go to superclasses too
    Field[] fields = o.getClass().getDeclaredFields();
    for (Field field : fields) {
      if ((field.getModifiers() & Modifier.STATIC) != 0)
        continue;
      Object value;
      try {
        value = field.get(o);
      } catch (Exception e) {
        value = "?";
      }
      
      String fieldName = field.getName();
      
      // special case for PWords - show only number of preloaded words
      if (shortName.equals("PWords") && field.getName().equals("preloaded"))
        value = ((Collection) value).size();

      if (!buf.isEmpty()) buf.append(", ");
      buf.append(fieldName + "=" + structure(value));
    }
    String s = shortName;
    if (!buf.isEmpty())
      s += "(" + buf + ")";
    return s;
  }
  
  static int getScore1(String prediction, String rest) {
    int error = leven(prediction, rest);
    return rest.length()-error;
  }
  
  static int getScore2(String prediction, String rest) {
    return commonPrefix(prediction, rest).length();
  }
  
  static class PWords extends Predictor {
    static boolean debug;
    new (Tree)Set<String> preloaded;
    
    public void preload(String s) {
      preloaded.addAll(findWords(s));
    }
    
    public String predict(String s, int chars) {
      Set<String> words = findWords(s);
      words.addAll(preloaded);
      String word = match("\\w+$", s);
      if (word == null) return "";
      if (debug)
        System.out.println("Looking for: " + word);
      for (String w : words)
        if (w.startsWith(word)) {
          String pred = w.substring(word.length());
          if (debug)
            System.out.println("PWords: predicted " + quote(pred) + " for " + quote(word) + " based on word " + quote(w));
          return pred;
        }
      return "";
    }
    
    boolean firstTime = true;
    Set<String> findWords(String s) {
      Set<String> words = new TreeSet<String>(
        Collections.reverseOrder()); // important so partial matches come later
      words.addAll(matchAll("\\w+", s));
      if (debug && firstTime && s.length() >= 100) {
        firstTime = false;
        //System.out.println("Words found: " + structure(words));
      }
      return words;
    }
  }

  static String match(String pattern, String text) {
    List<String> matches = new ArrayList<String>();
    Matcher matcher = Pattern.compile(pattern).matcher(text);
    return matcher.find() ? matcher.group() : null;
  }
  
  static class PIndent extends Predictor {
    public String predict(String s, int chars) {
      int i = s.lastIndexOf('\n');
      int j = i+1;
      while (j < s.length() && (s.charAt(j) == ' ' || s.charAt(j) == '\t'))
        ++j;
      return "\n" + s.substring(i, j);
    }
  }
}

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #705
Snippet name: An experiment with predictors
Eternal ID of this version: #705/1
Text MD5: 8059a7beeef1b00168b04cfdf9dae892
Transpilation MD5: dc3c4fae9e9a22d11ea51f435459ad77
Author: stefan
Category:
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-08-03 00:38:34
Source code size: 8213 bytes / 258 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 657 / 775
Referenced in: [show references]