ProbabilisticParser1 (shortened, LIVE) [1027944]

// A top-down left-to-right parser using probabilistic states
sclass ProbabilisticParser1 {
  new ProbabilisticMachine<State> pm;
  bool verbose;

  abstract class Action {
    S grammarClass;
    
    abstract void run(State state);
    
    //!include #1027987 // setExtraField, getExtraField
    
    S grammarClass() { ret grammarClass /*(S) getExtraField(ef_grammarClass)*/; }
    selfType setGrammarClass(S grammarClass) { this.grammarClass = grammarClass; this; }
    
    // extra field names
    //static final S ef_grammarClass = "grammarClass";
  }
  
  abstract class Consumer extends Action {
    // override this or the next method
    double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
    // tok is CNC starting & ending with code token
    double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); }
    
    int minTokensToConsume = 0;

    @Override
    void run(State state) {
      int maxTokensToConsume = state.remainingTokens();
      
      if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume);
      
      for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) {
        State s = state.prepareClone();
        s.iNextToken += n*2;
        LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
        s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
        s.matches = revChainPlus(s.matches, pair(state, tok));
        pm.addState(s);
      }
    }
  }
  
  noeq record ConsumeOneOfTokens(Set<S> tokens) extends Consumer {
    { tokens = asCISet(tokens); }
    *(S... tokens) { this.tokens = litciset(tokens); }
    
    double calcProbabilityForMatchedText(S s) {
      double p;
      if (tokens.contains(s)) p = empty(s) ? 90 : 100;
      else if (empty(s)) p = 50;
      else p = levenSimilarityIntIC_multi(s, tokens);
      ifdef ConsumeOneOfTokens_debug
      print("ConsumeOneOfTokens: " + p + " for " + s + " [" + tokens + "]");
      endifdef
      ret p;
    }
  }
  
  noeq record ConsumeToken(S token) extends Consumer {
    double emptyProbability = 50;
    
    double calcProbabilityForMatchedText(S s) {
      ret empty(s) ? emptyProbability : levenSimilarityIntIC(s, token);
    }
  }
  
  noeq record Any extends Consumer {
    *(S grammarClass) { setGrammarClass(grammarClass); }

    double calcProbabilityForMatchedText(S s) {
      ret 90;
    }
    
    toString { ret joinNemptiesWithSpace("Any", grammarClass); }
  }
  
  noeq record Filler extends Consumer {
    double calcProbabilityForMatchedTokens(LS tok) {
      ret 100-countCodeTokensInReversedCNC(tok)*10;
    }
  }
  
  noeq record EndOfInput extends Action {
    void run(State state) {
      State s = state.prepareClone();
      if (!state.endOfInput()) s.probability /= 2;
      s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken)));
      pm.addState(s);
    }
  }

  class State extends ProbabilisticMachine.State {
    LS tok; // CNC
    int iNextToken = 1;
    ReverseChain<Pair<State, LS>> matches; // values: reversed CNC
    O userObject; // copied around from state to state, e.g. reference to production

    toString {
      ret super.toString() + " iNextToken=\*iNextToken*/, matches: " + matchesFromAction();
    }
    
    ProbabilisticParser1 parser() { ret ProbabilisticParser1.this; }

    LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
    
    Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
    
    bool endOfInput() { ret iNextToken >= l(tok); }
    int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
    S nextToken() { ret get(tok, iNextToken); }

    State emptyClone() { ret new State; }
    
    State prepareClone() {
      ret copyFields(this, (State) super.prepareClone(), 'tok, 'iNextToken, 'matches, 'userObject);
    }
    void runAction(O action) {
      assertSame(machine, pm);
      if (verbose) print("Running action: " + action + ", machine: " + machine);
      ((Action) action).run(this);
      if (verbose) print("Ran action: " + className(action));
    }
  }
  
  BasicLogicRule patternToRule(S pattern) {
    ret ruleFromActions(
      listPlus(
        mapWithIndex(javaTok(pattern), (i, t) -> even(i)
          ? new Filler
          : eq(t, "*") ? new Any : new ConsumeToken(t)),
        new EndOfInput));
  }
  
  BasicLogicRule ruleFromActions(Action... actions) {
    ret ruleFromActions(asList(actions));
  }
  
  BasicLogicRule ruleFromActions(L<Action> actions) {
    ret BasicLogicRule(makeAnd(actions), formatFrag("parsed"));
  }

  // pattern e.g.: "Das * hat *.";
  void parse(S pattern, S input) {
    pm.reset();
    addState(javaTok(input), patternToRule(pattern));
    pm.think();
  }
  
  void parse(BasicLogicRule rule, S input) {
    pm.reset();
    addState(javaTok(input), rule);
    pm.think();
  }
  
  State addState(LS tok, BasicLogicRule rule) {
    new State state;
    state.tok = tok;
    state.remainingRule = curryLHS(rule);
    pm.addState(state);
    ret state;
  }

  Matches stateToMatches(State state) {
    ret stateToMatches(state, null);
  }
  
  Matches stateToMatches(State state, IPred<Action> actionsToCount) {
    if (state == null) null;
    new LS out;
    for (Pair<Action, LS> p : state.matchesFromAction())
      if (actionsToCount != null ? actionsToCount.get(p.a)
        : p.a instanceof Any || nempty(p.a.grammarClass()))
        out.add(join(p.b));
    ret matches(out);
  }
  
  Matches bestMatches() { ret stateToMatches(bestDoneState()); }
  State bestDoneState() { ret first(pm.doneStates); }
  
  void think { pm.think(); }
}

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

1	// A top-down left-to-right parser using probabilistic states
2	sclass ProbabilisticParser1 {
3	new ProbabilisticMachine<State> pm;
4	bool verbose;
5
6	abstract class Action {
7	S grammarClass;
8
9	abstract void run(State state);
10
11	//!include #1027987 // setExtraField, getExtraField
12
13	S grammarClass() { ret grammarClass /(S) getExtraField(ef_grammarClass)/; }
14	selfType setGrammarClass(S grammarClass) { this.grammarClass = grammarClass; this; }
15
16	// extra field names
17	//static final S ef_grammarClass = "grammarClass";
18	}
19
20	abstract class Consumer extends Action {
21	// override this or the next method
22	double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
23	// tok is CNC starting & ending with code token
24	double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); }
25
26	int minTokensToConsume = 0;
27
28	@Override
29	void run(State state) {
30	int maxTokensToConsume = state.remainingTokens();
31
32	if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume);
33
34	for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) {
35	State s = state.prepareClone();
36	s.iNextToken += n*2;
37	LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1);
38	s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok));
39	s.matches = revChainPlus(s.matches, pair(state, tok));
40	pm.addState(s);
41	}
42	}
43	}
44
45	noeq record ConsumeOneOfTokens(Set<S> tokens) extends Consumer {
46	{ tokens = asCISet(tokens); }
47	*(S... tokens) { this.tokens = litciset(tokens); }
48
49	double calcProbabilityForMatchedText(S s) {
50	double p;
51	if (tokens.contains(s)) p = empty(s) ? 90 : 100;
52	else if (empty(s)) p = 50;
53	else p = levenSimilarityIntIC_multi(s, tokens);
54	ifdef ConsumeOneOfTokens_debug
55	print("ConsumeOneOfTokens: " + p + " for " + s + " [" + tokens + "]");
56	endifdef
57	ret p;
58	}
59	}
60
61	noeq record ConsumeToken(S token) extends Consumer {
62	double emptyProbability = 50;
63
64	double calcProbabilityForMatchedText(S s) {
65	ret empty(s) ? emptyProbability : levenSimilarityIntIC(s, token);
66	}
67	}
68
69	noeq record Any extends Consumer {
70	*(S grammarClass) { setGrammarClass(grammarClass); }
71
72	double calcProbabilityForMatchedText(S s) {
73	ret 90;
74	}
75
76	toString { ret joinNemptiesWithSpace("Any", grammarClass); }
77	}
78
79	noeq record Filler extends Consumer {
80	double calcProbabilityForMatchedTokens(LS tok) {
81	ret 100-countCodeTokensInReversedCNC(tok)*10;
82	}
83	}
84
85	noeq record EndOfInput extends Action {
86	void run(State state) {
87	State s = state.prepareClone();
88	if (!state.endOfInput()) s.probability /= 2;
89	s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken)));
90	pm.addState(s);
91	}
92	}
93
94	class State extends ProbabilisticMachine.State {
95	LS tok; // CNC
96	int iNextToken = 1;
97	ReverseChain<Pair<State, LS>> matches; // values: reversed CNC
98	O userObject; // copied around from state to state, e.g. reference to production
99
100	toString {
101	ret super.toString() + " iNextToken=\iNextToken/, matches: " + matchesFromAction();
102	}
103
104	ProbabilisticParser1 parser() { ret ProbabilisticParser1.this; }
105
106	LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
107
108	Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
109
110	bool endOfInput() { ret iNextToken >= l(tok); }
111	int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
112	S nextToken() { ret get(tok, iNextToken); }
113
114	State emptyClone() { ret new State; }
115
116	State prepareClone() {
117	ret copyFields(this, (State) super.prepareClone(), 'tok, 'iNextToken, 'matches, 'userObject);
118	}
119	void runAction(O action) {
120	assertSame(machine, pm);
121	if (verbose) print("Running action: " + action + ", machine: " + machine);
122	((Action) action).run(this);
123	if (verbose) print("Ran action: " + className(action));
124	}
125	}
126
127	BasicLogicRule patternToRule(S pattern) {
128	ret ruleFromActions(
129	listPlus(
130	mapWithIndex(javaTok(pattern), (i, t) -> even(i)
131	? new Filler
132	: eq(t, "*") ? new Any : new ConsumeToken(t)),
133	new EndOfInput));
134	}
135
136	BasicLogicRule ruleFromActions(Action... actions) {
137	ret ruleFromActions(asList(actions));
138	}
139
140	BasicLogicRule ruleFromActions(L<Action> actions) {
141	ret BasicLogicRule(makeAnd(actions), formatFrag("parsed"));
142	}
143
144	// pattern e.g.: "Das * hat *.";
145	void parse(S pattern, S input) {
146	pm.reset();
147	addState(javaTok(input), patternToRule(pattern));
148	pm.think();
149	}
150
151	void parse(BasicLogicRule rule, S input) {
152	pm.reset();
153	addState(javaTok(input), rule);
154	pm.think();
155	}
156
157	State addState(LS tok, BasicLogicRule rule) {
158	new State state;
159	state.tok = tok;
160	state.remainingRule = curryLHS(rule);
161	pm.addState(state);
162	ret state;
163	}
164
165	Matches stateToMatches(State state) {
166	ret stateToMatches(state, null);
167	}
168
169	Matches stateToMatches(State state, IPred<Action> actionsToCount) {
170	if (state == null) null;
171	new LS out;
172	for (Pair<Action, LS> p : state.matchesFromAction())
173	if (actionsToCount != null ? actionsToCount.get(p.a)
174	: p.a instanceof Any \|\| nempty(p.a.grammarClass()))
175	out.add(join(p.b));
176	ret matches(out);
177	}
178
179	Matches bestMatches() { ret stateToMatches(bestDoneState()); }
180	State bestDoneState() { ret first(pm.doneStates); }
181
182	void think { pm.think(); }
183	}

Snippet ID:	#1027944
Snippet name:	ProbabilisticParser1 (shortened, LIVE)
Eternal ID of this version:	#1027944/57
Text MD5:	b7a199bc89b5c1c41101d4f8f9eae039
Transpilation MD5:	fba0efcb5a7be3b887a79aa00427ad4a
Author:	stefan
Category:	javax
Type:	JavaX fragment (include)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-06-25 21:21:19
Source code size:	5859 bytes / 183 lines
Pitched / IR pitched:	No / No
Views / Downloads:	797 / 1400
Version history:	56 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1027944 // ProbabilisticParser1 (shortened, LIVE)

JavaX fragment (include) [tags: use-pretranspiled]

Author comment