ProbabilisticParser1 (before shortening) [1027937]

sclass ProbabilisticParser1 {
  transient TreeSetWithDuplicates<State> doneStates = new(byProbability());
  transient TreeSetWithDuplicates<State> states = new(byProbability());
  transient TreeSetWithDuplicates<State> steppableStates = new(byProbability());
  transient TreeSetWithDuplicates<State> droppedStates = new(byProbability());
  transient int stateCount;

  double cutoffPercentage = 50;
  
  Comparator<State> byProbability() { ret (a, b) -> cmp(b.probability, a.probability); }
  
  abstract class Action {
    abstract void run(State state);
    
    State prepareClone(State state) {
      new State s;
      copyFields(state, s, 'tok, 'iNextToken, 'probability, 'matches);
      s.prev = state;
      s.remainingRule = optCast BasicLogicRule(state.remainingRule.rhs);
      ret s;
    }
  }
  
  abstract class Consumer extends Action {
    // override this or the next method
    double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
    // tok is CNC starting & ending with code token
    double calcProbabilityForMatchedTokens(LS tok) {
 ret calcProbabilityForMatchedText(join(tok));
 }
    
    void run(State state) {
      int maxTokensToConsume = state.remainingTokens();
      
      for (int n = 0; n <= maxTokensToConsume; n++) {
        State s = prepareClone(state);
        s.iNextToken += n*2;
        LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
        s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
        s.matches = revChainPlus(s.matches, pair(state, tok));
        addState(s);
      }
    }
  }
  
  noeq record ConsumeToken(S token) extends Consumer {
    double calcProbabilityForMatchedText(S s) {
      ret empty(s) ? 50 : levenSimilarityIntIC(s, token);
    }
  }
  
  noeq record Any extends Consumer {
    double calcProbabilityForMatchedText(S s) {
      ret 90;
    }
  }
  
  noeq record Filler extends Consumer {
    double calcProbabilityForMatchedTokens(LS tok) {
      ret 100-countCodeTokensInReversedCNC(tok)*10;
    }
  }
  
  noeq record EndOfInput extends Action {
    void run(State state) {
      State s = prepareClone(state);
      if (!state.endOfInput()) s.probability /= 2;
      s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken)));
      addState(s);
    }
  }

  class State {
    int number = ++stateCount;
    State prev;
    double probability = 100;
    LS tok; // CNC
    int iNextToken = 1;
    BasicLogicRule remainingRule;
    ReverseChain<Pair<State, LS>> matches; // values: reversed CNC

    toString {
      ret toStringWithFields(this, "number", "probability", "iNextToken") + stringIf(done(), " (done)" + " matches: " + matchesFromAction());
    }

    LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
    
    bool done() { ret remainingRule == null; }
    
    bool endOfInput() { ret iNextToken >= l(tok); }
    int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
    S nextToken() { ret get(tok, iNextToken); }

    Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
    
    void step { if (!done()) action().run(this); }
  }
  
  void addState(State s) {
    if (s.probability < cutoffPercentage) ret with droppedStates.add(s);
    addToCollections(s, states, steppableStates);
    if (s.done()) doneStates.add(s);
  }
  
  bool stepFirstUnstepped() {
    State s = popFirst(steppableStates), ret false if null;
    ret true with s.step();
  }

  BasicLogicRule patternToRule(S pattern) {
    ret curryLHS(BasicLogicRule(
      makeAnd(listPlus(
        mapWithIndex(javaTok(pattern), (i, t) -> even(i)
          ? new Filler
          : eq(t, "*") ? new Any : new ConsumeToken(t)),
        new EndOfInput)),
      formatFrag("parsed")));
  }

  void reset {
    clearAll(doneStates, states, steppableStates, droppedStates);
    stateCount = 0;
  }

  // pattern e.g.: "Das * hat *.";
  void parse(S pattern, S input) {
    reset();
    BasicLogicRule rule = patternToRule(pattern);
    print(rule);
    
    new State state;
    state.tok = javaTok(input);
    state.remainingRule = rule;
    addState(state);
    while ping (stepFirstUnstepped()) {}
  }

  L<State> bestStates(int n) {
    ret takeFirst(n, doneStates);
  }

  Matches stateToMatches(State state) {
    if (state == null) null;
    new LS out;
    for (Pair<Action, LS> p : state.matchesFromAction())
      if (p.a instanceof Any)
        out.add(join(p.b));
    ret matches(out);
  }
  
  Matches bestMatches() { ret stateToMatches(first(doneStates)); }
}

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

1	sclass ProbabilisticParser1 {
2	transient TreeSetWithDuplicates<State> doneStates = new(byProbability());
3	transient TreeSetWithDuplicates<State> states = new(byProbability());
4	transient TreeSetWithDuplicates<State> steppableStates = new(byProbability());
5	transient TreeSetWithDuplicates<State> droppedStates = new(byProbability());
6	transient int stateCount;
7
8	double cutoffPercentage = 50;
9
10	Comparator<State> byProbability() { ret (a, b) -> cmp(b.probability, a.probability); }
11
12	abstract class Action {
13	abstract void run(State state);
14
15	State prepareClone(State state) {
16	new State s;
17	copyFields(state, s, 'tok, 'iNextToken, 'probability, 'matches);
18	s.prev = state;
19	s.remainingRule = optCast BasicLogicRule(state.remainingRule.rhs);
20	ret s;
21	}
22	}
23
24	abstract class Consumer extends Action {
25	// override this or the next method
26	double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
27	// tok is CNC starting & ending with code token
28	double calcProbabilityForMatchedTokens(LS tok) {
29	ret calcProbabilityForMatchedText(join(tok));
30	}
31
32	void run(State state) {
33	int maxTokensToConsume = state.remainingTokens();
34
35	for (int n = 0; n <= maxTokensToConsume; n++) {
36	State s = prepareClone(state);
37	s.iNextToken += n*2;
38	LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
39	s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
40	s.matches = revChainPlus(s.matches, pair(state, tok));
41	addState(s);
42	}
43	}
44	}
45
46	noeq record ConsumeToken(S token) extends Consumer {
47	double calcProbabilityForMatchedText(S s) {
48	ret empty(s) ? 50 : levenSimilarityIntIC(s, token);
49	}
50	}
51
52	noeq record Any extends Consumer {
53	double calcProbabilityForMatchedText(S s) {
54	ret 90;
55	}
56	}
57
58	noeq record Filler extends Consumer {
59	double calcProbabilityForMatchedTokens(LS tok) {
60	ret 100-countCodeTokensInReversedCNC(tok)*10;
61	}
62	}
63
64	noeq record EndOfInput extends Action {
65	void run(State state) {
66	State s = prepareClone(state);
67	if (!state.endOfInput()) s.probability /= 2;
68	s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken)));
69	addState(s);
70	}
71	}
72
73	class State {
74	int number = ++stateCount;
75	State prev;
76	double probability = 100;
77	LS tok; // CNC
78	int iNextToken = 1;
79	BasicLogicRule remainingRule;
80	ReverseChain<Pair<State, LS>> matches; // values: reversed CNC
81
82	toString {
83	ret toStringWithFields(this, "number", "probability", "iNextToken") + stringIf(done(), " (done)" + " matches: " + matchesFromAction());
84	}
85
86	LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
87
88	bool done() { ret remainingRule == null; }
89
90	bool endOfInput() { ret iNextToken >= l(tok); }
91	int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
92	S nextToken() { ret get(tok, iNextToken); }
93
94	Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
95
96	void step { if (!done()) action().run(this); }
97	}
98
99	void addState(State s) {
100	if (s.probability < cutoffPercentage) ret with droppedStates.add(s);
101	addToCollections(s, states, steppableStates);
102	if (s.done()) doneStates.add(s);
103	}
104
105	bool stepFirstUnstepped() {
106	State s = popFirst(steppableStates), ret false if null;
107	ret true with s.step();
108	}
109
110	BasicLogicRule patternToRule(S pattern) {
111	ret curryLHS(BasicLogicRule(
112	makeAnd(listPlus(
113	mapWithIndex(javaTok(pattern), (i, t) -> even(i)
114	? new Filler
115	: eq(t, "*") ? new Any : new ConsumeToken(t)),
116	new EndOfInput)),
117	formatFrag("parsed")));
118	}
119
120	void reset {
121	clearAll(doneStates, states, steppableStates, droppedStates);
122	stateCount = 0;
123	}
124
125	// pattern e.g.: "Das * hat *.";
126	void parse(S pattern, S input) {
127	reset();
128	BasicLogicRule rule = patternToRule(pattern);
129	print(rule);
130
131	new State state;
132	state.tok = javaTok(input);
133	state.remainingRule = rule;
134	addState(state);
135	while ping (stepFirstUnstepped()) {}
136	}
137
138	L<State> bestStates(int n) {
139	ret takeFirst(n, doneStates);
140	}
141
142	Matches stateToMatches(State state) {
143	if (state == null) null;
144	new LS out;
145	for (Pair<Action, LS> p : state.matchesFromAction())
146	if (p.a instanceof Any)
147	out.add(join(p.b));
148	ret matches(out);
149	}
150
151	Matches bestMatches() { ret stateToMatches(first(doneStates)); }
152	}

Snippet ID:	#1027937
Snippet name:	ProbabilisticParser1 (before shortening)
Eternal ID of this version:	#1027937/10
Text MD5:	a13434d08ef0fa5a9650a1bf6d46537d
Transpilation MD5:	be801c469d5bed4d2a544f28694105a4
Author:	stefan
Category:	javax
Type:	JavaX fragment (include)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-04-20 12:02:45
Source code size:	4731 bytes / 152 lines
Pitched / IR pitched:	No / No
Views / Downloads:	268 / 396
Version history:	9 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1027937 // ProbabilisticParser1 (before shortening)

JavaX fragment (include) [tags: use-pretranspiled]