Libraryless. Click here for Pure Java version (4977L/32K).
1 | // A top-down left-to-right parser using probabilistic states |
2 | sclass ProbabilisticParser1 {
|
3 | new ProbabilisticMachine<State> pm; |
4 | bool verbose; |
5 | |
6 | abstract class Action {
|
7 | S grammarClass; |
8 | |
9 | abstract void run(State state); |
10 | |
11 | //!include #1027987 // setExtraField, getExtraField |
12 | |
13 | S grammarClass() { ret grammarClass /*(S) getExtraField(ef_grammarClass)*/; }
|
14 | selfType setGrammarClass(S grammarClass) { this.grammarClass = grammarClass; this; }
|
15 | |
16 | // extra field names |
17 | //static final S ef_grammarClass = "grammarClass"; |
18 | } |
19 | |
20 | abstract class Consumer extends Action {
|
21 | // override this or the next method |
22 | double calcProbabilityForMatchedText(S s) { throw overrideMe(); }
|
23 | // tok is CNC starting & ending with code token |
24 | double calcProbabilityForMatchedTokens(LS tok) { ret calcProbabilityForMatchedText(join(tok)); }
|
25 | |
26 | int minTokensToConsume = 0; |
27 | |
28 | @Override |
29 | void run(State state) {
|
30 | int maxTokensToConsume = state.remainingTokens(); |
31 | |
32 | if (verbose) print(this + ": maxTokensToConsume= " + maxTokensToConsume); |
33 | |
34 | for (int n = minTokensToConsume; n <= maxTokensToConsume; n++) {
|
35 | State s = state.prepareClone(); |
36 | s.iNextToken += n*2; |
37 | LS tok = subList(state.tok, state.iNextToken, s.iNextToken-1); |
38 | s.probability = multiplyPercentages(s.probability, calcProbabilityForMatchedTokens(tok)); |
39 | s.matches = revChainPlus(s.matches, pair(state, tok)); |
40 | pm.addState(s); |
41 | } |
42 | } |
43 | } |
44 | |
45 | noeq record ConsumeOneOfTokens(Set<S> tokens) extends Consumer {
|
46 | { tokens = asCISet(tokens); }
|
47 | *(S... tokens) { this.tokens = litciset(tokens); }
|
48 | |
49 | double calcProbabilityForMatchedText(S s) {
|
50 | double p; |
51 | if (tokens.contains(s)) p = empty(s) ? 90 : 100; |
52 | else if (empty(s)) p = 50; |
53 | else p = levenSimilarityIntIC_multi(s, tokens); |
54 | ifdef ConsumeOneOfTokens_debug |
55 | print("ConsumeOneOfTokens: " + p + " for " + s + " [" + tokens + "]");
|
56 | endifdef |
57 | ret p; |
58 | } |
59 | } |
60 | |
61 | noeq record ConsumeToken(S token) extends Consumer {
|
62 | double emptyProbability = 50; |
63 | |
64 | double calcProbabilityForMatchedText(S s) {
|
65 | ret empty(s) ? emptyProbability : levenSimilarityIntIC(s, token); |
66 | } |
67 | } |
68 | |
69 | noeq record Any extends Consumer {
|
70 | *(S grammarClass) { setGrammarClass(grammarClass); }
|
71 | |
72 | double calcProbabilityForMatchedText(S s) {
|
73 | ret 90; |
74 | } |
75 | |
76 | toString { ret joinNemptiesWithSpace("Any", grammarClass); }
|
77 | } |
78 | |
79 | noeq record Filler extends Consumer {
|
80 | double calcProbabilityForMatchedTokens(LS tok) {
|
81 | ret 100-countCodeTokensInReversedCNC(tok)*10; |
82 | } |
83 | } |
84 | |
85 | noeq record EndOfInput extends Action {
|
86 | void run(State state) {
|
87 | State s = state.prepareClone(); |
88 | if (!state.endOfInput()) s.probability /= 2; |
89 | s.matches = revChainPlus(s.matches, pair(state, subList(s.tok, s.iNextToken))); |
90 | pm.addState(s); |
91 | } |
92 | } |
93 | |
94 | class State extends ProbabilisticMachine.State {
|
95 | LS tok; // CNC |
96 | int iNextToken = 1; |
97 | ReverseChain<Pair<State, LS>> matches; // values: reversed CNC |
98 | O userObject; // copied around from state to state, e.g. reference to production |
99 | |
100 | toString {
|
101 | ret super.toString() + " iNextToken=\*iNextToken*/, matches: " + matchesFromAction(); |
102 | } |
103 | |
104 | ProbabilisticParser1 parser() { ret ProbabilisticParser1.this; }
|
105 | |
106 | LPair<Action, LS> matchesFromAction() { ret mapPairsA(s -> s.action(), matches); }
|
107 | |
108 | Action action() { ret remainingRule == null ? null : (Action) remainingRule.lhs; }
|
109 | |
110 | bool endOfInput() { ret iNextToken >= l(tok); }
|
111 | int remainingTokens() { ret (l(tok)-iNextToken)/2+1; }
|
112 | S nextToken() { ret get(tok, iNextToken); }
|
113 | |
114 | State emptyClone() { ret new State; }
|
115 | |
116 | State prepareClone() {
|
117 | ret copyFields(this, (State) super.prepareClone(), 'tok, 'iNextToken, 'matches, 'userObject); |
118 | } |
119 | void runAction(O action) {
|
120 | assertSame(machine, pm); |
121 | if (verbose) print("Running action: " + action + ", machine: " + machine);
|
122 | ((Action) action).run(this); |
123 | if (verbose) print("Ran action: " + className(action));
|
124 | } |
125 | } |
126 | |
127 | BasicLogicRule patternToRule(S pattern) {
|
128 | ret ruleFromActions( |
129 | listPlus( |
130 | mapWithIndex(javaTok(pattern), (i, t) -> even(i) |
131 | ? new Filler |
132 | : eq(t, "*") ? new Any : new ConsumeToken(t)), |
133 | new EndOfInput)); |
134 | } |
135 | |
136 | BasicLogicRule ruleFromActions(Action... actions) {
|
137 | ret ruleFromActions(asList(actions)); |
138 | } |
139 | |
140 | BasicLogicRule ruleFromActions(L<Action> actions) {
|
141 | ret BasicLogicRule(makeAnd(actions), formatFrag("parsed"));
|
142 | } |
143 | |
144 | // pattern e.g.: "Das * hat *."; |
145 | void parse(S pattern, S input) {
|
146 | pm.reset(); |
147 | addState(javaTok(input), patternToRule(pattern)); |
148 | pm.think(); |
149 | } |
150 | |
151 | void parse(BasicLogicRule rule, S input) {
|
152 | pm.reset(); |
153 | addState(javaTok(input), rule); |
154 | pm.think(); |
155 | } |
156 | |
157 | State addState(LS tok, BasicLogicRule rule) {
|
158 | new State state; |
159 | state.tok = tok; |
160 | state.remainingRule = curryLHS(rule); |
161 | pm.addState(state); |
162 | ret state; |
163 | } |
164 | |
165 | Matches stateToMatches(State state) {
|
166 | ret stateToMatches(state, null); |
167 | } |
168 | |
169 | Matches stateToMatches(State state, IPred<Action> actionsToCount) {
|
170 | if (state == null) null; |
171 | new LS out; |
172 | for (Pair<Action, LS> p : state.matchesFromAction()) |
173 | if (actionsToCount != null ? actionsToCount.get(p.a) |
174 | : p.a instanceof Any || nempty(p.a.grammarClass())) |
175 | out.add(join(p.b)); |
176 | ret matches(out); |
177 | } |
178 | |
179 | Matches bestMatches() { ret stateToMatches(bestDoneState()); }
|
180 | State bestDoneState() { ret first(pm.doneStates); }
|
181 | |
182 | void think { pm.think(); }
|
183 | } |
Began life as a copy of #1027937
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1027944 |
| Snippet name: | ProbabilisticParser1 (shortened, LIVE) |
| Eternal ID of this version: | #1027944/57 |
| Text MD5: | b7a199bc89b5c1c41101d4f8f9eae039 |
| Transpilation MD5: | fba0efcb5a7be3b887a79aa00427ad4a |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2020-06-25 21:21:19 |
| Source code size: | 5859 bytes / 183 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 597 / 1194 |
| Version history: | 56 change(s) |
| Referenced in: | [show references] |