Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

57
LINES

< > BotCompany Repo | #1028547 - EnglishDateParser, attempt 2 [dev.]

JavaX fragment (include)

sclass EnglishDateParser > DateStructures {
  bool assumeFuture = true; // e.g. for "tuesday" [not used]
  ProbabilisticMachine2 pm;
  LS tok; // tokenized input
  
  class CompletedParse extends ProbabilisticMachine2.State {
    IntRange tokenRange; // what did we parse
    SomeDate interpretation; // what do we think it means
  }
  
  
  record ParseYear(int iTok) extends ProbabilisticMachine2.State {
    int minTokensToConsume = 0;
    
    double calcProbabilityForMatchedText(S s) {
    }
    
    void step {
      int maxTokensToConsume = state.remainingTokens();
    }

    @Override
    void run(State state) {
      
      if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume);
      
      for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) {
        State s = state.prepareClone();
        s.iNextToken += n*2;
        LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
        s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
        s.matches = revChainPlus(s.matches, pair(state, tok));
        pm.addState(s);
      }
    }
  }    
    
      if (isInteger(tok.get(iTok)))
        ret 
    }
  }
  
  record ScanFromToken(int iTok) extends ProbabilisticMachine2.State {
    void step() {
      if (i <= l(tok))
    }
  }

  SomeDate parse(S s) null {
    pm = new ProbabilisticMachine2;
    tok = javaTok(s);

    // let's try to parse a year just to get started
    
    pm.addState(new ScanFromToken(1));
  }
}

Author comment

Began life as a copy of #1028540

download  show line numbers  debug dex   

Travelled to 6 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028547
Snippet name: EnglishDateParser, attempt 2 [dev.]
Eternal ID of this version: #1028547/1
Text MD5: 4abd92799999279b5b202e3c776c07fa
Author: stefan
Category:
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-06-26 17:06:10
Source code size: 1574 bytes / 57 lines
Pitched / IR pitched: No / No
Views / Downloads: 25 / 32
Referenced in: [show references]

Formerly at http://tinybrain.de/1028547 & http://1028547.tinybrain.de