Libraryless. Click here for Pure Java version (4977L/32K).
1 | // A top-down left-to-right parser using probabilistic states |
2 | sclass ProbabilisticParser1 { |
3 | new ProbabilisticMachine<State> pm; |
4 | bool verbose; |
5 | |
6 | abstract class Action { |
7 | S grammarClass; |
8 | |
9 | abstract void run(State state); |
10 | |
11 | //!include #1027987 // setExtraField, getExtraField |
12 | |
13 | S grammarClass() { ret grammarClass /*(S) getExtraField(ef_grammarClass)*/; } |
14 | selfType setGrammarClass(S grammarClass) { this.grammarClass = grammarClass; this; } |
15 | |
16 | // extra field names |
17 | //static final S ef_grammarClass = "grammarClass"; |
18 | } |
19 | |
20 | abstract class Consumer extends Action { |
21 | // override this or the next method |
22 | double calcProbabilityForMatchedText(S s) { throw overrideMe(); } |
23 | // tok is CNC starting & ending with code token |
24 | double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); } |
25 | |
26 | int minTokensToConsume = 0; |
27 | |
28 | @Override |
29 | void run(State state) { |
30 | int maxTokensToConsume = state.remainingTokens(); |
31 | |
32 | if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume); |
33 | |
34 | for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) { |
35 | State s = state.prepareClone(); |
36 | s.iNextToken += n*2; |
37 | LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1); |
38 | s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok)); |
39 | s.matches = revChainPlus(s.matches, pair(state, tok)); |
40 | pm.addState(s); |
41 | } |
42 | } |
43 | } |
44 | |
45 | noeq record ConsumeOneOfTokens(Set<S> tokens) extends Consumer { |
46 | { tokens = asCISet(tokens); } |
47 | *(S... tokens) { this.tokens = litciset(tokens); } |
48 | |
49 | double calcProbabilityForMatchedText(S s) { |
50 | double p; |
51 | if (tokens.contains(s)) p = empty(s) ? 90 : 100; |
52 | else if (empty(s)) p = 50; |
53 | else p = levenSimilarityIntIC_multi(s, tokens); |
54 | ifdef ConsumeOneOfTokens_debug |
55 | print("ConsumeOneOfTokens: " + p + " for " + s + " [" + tokens + "]"); |
56 | endifdef |
57 | ret p; |
58 | } |
59 | } |
60 | |
61 | noeq record ConsumeToken(S token) extends Consumer { |
62 | double emptyProbability = 50; |
63 | |
64 | double calcProbabilityForMatchedText(S s) { |
65 | ret empty(s) ? emptyProbability : levenSimilarityIntIC(s, token); |
66 | } |
67 | } |
68 | |
69 | noeq record Any extends Consumer { |
70 | *(S grammarClass) { setGrammarClass(grammarClass); } |
71 | |
72 | double calcProbabilityForMatchedText(S s) { |
73 | ret 90; |
74 | } |
75 | |
76 | toString { ret joinNemptiesWithSpace("Any", grammarClass); } |
77 | } |
78 | |
79 | noeq record Filler extends Consumer { |
80 | double calcProbabilityForMatchedTokens(LS tok) { |
81 | ret 100-countCodeTokensInReversedCNC(tok)*10; |
82 | } |
83 | } |
84 | |
85 | noeq record EndOfInput extends Action { |
86 | void run(State state) { |
87 | State s = state.prepareClone(); |
88 | if (!state.endOfInput()) s.probability /= 2; |
89 | s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken))); |
90 | pm.addState(s); |
91 | } |
92 | } |
93 | |
94 | class State extends ProbabilisticMachine.State { |
95 | LS tok; // CNC |
96 | int iNextToken = 1; |
97 | ReverseChain<Pair<State, LS>> matches; // values: reversed CNC |
98 | O userObject; // copied around from state to state, e.g. reference to production |
99 | |
100 | toString { |
101 | ret super.toString() + " iNextToken=\*iNextToken*/, matches: " + matchesFromAction(); |
102 | } |
103 | |
104 | ProbabilisticParser1 parser() { ret ProbabilisticParser1.this; } |
105 | |
106 | LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); } |
107 | |
108 | Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; } |
109 | |
110 | bool endOfInput() { ret iNextToken >= l(tok); } |
111 | int remainingTokens() { ret (l(tok)-iNextToken)/2+1; } |
112 | S nextToken() { ret get(tok, iNextToken); } |
113 | |
114 | State emptyClone() { ret new State; } |
115 | |
116 | State prepareClone() { |
117 | ret copyFields(this, (State) super.prepareClone(), 'tok, 'iNextToken, 'matches, 'userObject); |
118 | } |
119 | void runAction(O action) { |
120 | assertSame(machine, pm); |
121 | if (verbose) print("Running action: " + action + ", machine: " + machine); |
122 | ((Action) action).run(this); |
123 | if (verbose) print("Ran action: " + className(action)); |
124 | } |
125 | } |
126 | |
127 | BasicLogicRule patternToRule(S pattern) { |
128 | ret ruleFromActions( |
129 | listPlus( |
130 | mapWithIndex(javaTok(pattern), (i, t) -> even(i) |
131 | ? new Filler |
132 | : eq(t, "*") ? new Any : new ConsumeToken(t)), |
133 | new EndOfInput)); |
134 | } |
135 | |
136 | BasicLogicRule ruleFromActions(Action... actions) { |
137 | ret ruleFromActions(asList(actions)); |
138 | } |
139 | |
140 | BasicLogicRule ruleFromActions(L<Action> actions) { |
141 | ret BasicLogicRule(makeAnd(actions), formatFrag("parsed")); |
142 | } |
143 | |
144 | // pattern e.g.: "Das * hat *."; |
145 | void parse(S pattern, S input) { |
146 | pm.reset(); |
147 | addState(javaTok(input), patternToRule(pattern)); |
148 | pm.think(); |
149 | } |
150 | |
151 | void parse(BasicLogicRule rule, S input) { |
152 | pm.reset(); |
153 | addState(javaTok(input), rule); |
154 | pm.think(); |
155 | } |
156 | |
157 | State addState(LS tok, BasicLogicRule rule) { |
158 | new State state; |
159 | state.tok = tok; |
160 | state.remainingRule = curryLHS(rule); |
161 | pm.addState(state); |
162 | ret state; |
163 | } |
164 | |
165 | Matches stateToMatches(State state) { |
166 | ret stateToMatches(state, null); |
167 | } |
168 | |
169 | Matches stateToMatches(State state, IPred<Action> actionsToCount) { |
170 | if (state == null) null; |
171 | new LS out; |
172 | for (Pair<Action, LS> p : state.matchesFromAction()) |
173 | if (actionsToCount != null ? actionsToCount.get(p.a) |
174 | : p.a instanceof Any || nempty(p.a.grammarClass())) |
175 | out.add(join(p.b)); |
176 | ret matches(out); |
177 | } |
178 | |
179 | Matches bestMatches() { ret stateToMatches(bestDoneState()); } |
180 | State bestDoneState() { ret first(pm.doneStates); } |
181 | |
182 | void think { pm.think(); } |
183 | } |
Began life as a copy of #1027937
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1027944 |
Snippet name: | ProbabilisticParser1 (shortened, LIVE) |
Eternal ID of this version: | #1027944/57 |
Text MD5: | b7a199bc89b5c1c41101d4f8f9eae039 |
Transpilation MD5: | fba0efcb5a7be3b887a79aa00427ad4a |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-06-25 21:21:19 |
Source code size: | 5859 bytes / 183 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 357 / 906 |
Version history: | 56 change(s) |
Referenced in: | [show references] |