1 | !636 |
2 | !629 // standard functions |
3 | !1000300 // class Lexicon |
4 | |
5 | main {
|
6 | psvm {
|
7 | String src = takeInput(args, "class main {\n String s;\n}");
|
8 | Java20 lex = new Java20(); |
9 | src = src.replace("\r\n", "\n");
|
10 | LineNumberReader source = new LineNumberReader(new StringReader(src)); |
11 | int lineNr = source.getLineNumber()+1; |
12 | for (Object a; (a = lex.grab(source)) != lex.$;) {
|
13 | System.out.println("grabbed at line " + lineNr + ": " + a + " " + quote(lex.word()));
|
14 | lineNr = source.getLineNumber()+1; |
15 | } |
16 | } |
17 | |
18 | static String takeInput(String[] args, String def) tex {
|
19 | if (args.length != 0) return loadSnippet(args[0]); |
20 | return loadTextFile("input/input.txt", def);
|
21 | } |
22 | |
23 | public static String quote(String s) {
|
24 | if (s == null) return "null"; |
25 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\"";
|
26 | } |
27 | |
28 | static class Java20 extends Lexicon {
|
29 | |
30 | Java20() {
|
31 | |
32 | /** |
33 | * Grammar for Java 2.0. |
34 | * |
35 | * Nonterminal - first letter uppercase |
36 | * TERMINAL - all letters uppercase |
37 | * keyword - all letters lowercase |
38 | */ |
39 | int INFINITY = -1; |
40 | |
41 | /** |
42 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
43 | */ |
44 | put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));
|
45 | |
46 | /** |
47 | * 19.3 Terminals from section 3.7: Comment |
48 | */ |
49 | put("COMMENT", new Union(
|
50 | |
51 | // |
52 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
53 | // |
54 | new Concatenation( |
55 | new Singleton("/*"), new Concatenation(
|
56 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
|
57 | new Repetition( |
58 | new Concatenation( |
59 | new Singleton("*"),
|
60 | new Repetition(new Concatenation( |
61 | new NonMatch("*/"),
|
62 | new Repetition(new NonMatch("*"), 0, INFINITY)
|
63 | ), 0, 1) |
64 | ), 0, INFINITY |
65 | ), |
66 | new Singleton("*/")
|
67 | ))), new Union( |
68 | |
69 | /** |
70 | * End Of Line Comment: //[^\n]*\n |
71 | */ |
72 | new Concatenation( |
73 | new Singleton("//"), new Concatenation(
|
74 | new Repetition(new NonMatch("\n"), 0, INFINITY),
|
75 | new Singleton("\n")
|
76 | )), |
77 | |
78 | // |
79 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
80 | // |
81 | new Concatenation( |
82 | new Singleton("/**"), new Concatenation(
|
83 | new Repetition( |
84 | new Concatenation( |
85 | new Repetition(new Concatenation( |
86 | new NonMatch("*/"),
|
87 | new Repetition(new NonMatch("*"), 0, INFINITY)
|
88 | ), 0, 1), |
89 | new Singleton("*")
|
90 | ), 0, INFINITY |
91 | ), |
92 | new Singleton("/")
|
93 | )) |
94 | ))); |
95 | |
96 | put("IDENTIFIER", new Concatenation(
|
97 | new Union( |
98 | PosixClass.alpha(), |
99 | new Match("_$")
|
100 | ), |
101 | new Repetition( |
102 | new Union( |
103 | PosixClass.alnum(), |
104 | new Match("_$")
|
105 | ), 0, INFINITY |
106 | ) |
107 | )); |
108 | |
109 | /** |
110 | * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) |
111 | */ |
112 | put("KEYWORD", new Union(
|
113 | new Singleton("const"),
|
114 | new Singleton("goto")
|
115 | )); |
116 | |
117 | /** |
118 | * 19.3 Terminals from section 3.10.1: Integer Literal |
119 | */ |
120 | put("INTEGER_LITERAL", new Concatenation(
|
121 | new Union( |
122 | /** |
123 | * Decimal Integer Literal: 0|[1-9][[:digit:]]* |
124 | */ |
125 | new Singleton("0"), new Union(
|
126 | |
127 | new Concatenation( |
128 | new Range('1', '9'),
|
129 | new Repetition(PosixClass.digit(), 0, INFINITY) |
130 | ), new Union( |
131 | |
132 | /** |
133 | * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ |
134 | */ |
135 | new Concatenation( |
136 | new Singleton("0"), new Concatenation(
|
137 | new Match("xX"),
|
138 | new Repetition(PosixClass.xdigit(), 1, INFINITY) |
139 | )), |
140 | |
141 | /** |
142 | * Octal Integer Literal: 0[0-7]+ |
143 | */ |
144 | new Concatenation( |
145 | new Singleton("0"),
|
146 | new Repetition(new Range('0', '7'), 1, INFINITY)
|
147 | ) |
148 | ))), |
149 | new Repetition(new Match("lL"), 0, 1)
|
150 | )); |
151 | |
152 | /** |
153 | * 19.3 Terminals from section 3.10.2: Floating-Point Literal |
154 | */ |
155 | put("FLOATING_POINT_LITERAL", new Union(
|
156 | |
157 | /** |
158 | * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? |
159 | */ |
160 | new Concatenation( |
161 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
162 | new Singleton("."), new Concatenation(
|
163 | new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( |
164 | new Repetition(new Concatenation( |
165 | new Match("eE"), new Concatenation(
|
166 | new Repetition(new Match("-+"), 0, 1),
|
167 | new Repetition(PosixClass.digit(), 1, INFINITY) |
168 | )), 0, 1), |
169 | new Repetition(new Match("fFdD"), 0, 1)
|
170 | )))), new Union( |
171 | |
172 | /** |
173 | * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? |
174 | */ |
175 | new Concatenation( |
176 | new Singleton("."), new Concatenation(
|
177 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
178 | new Repetition(new Concatenation( |
179 | new Match("eE"), new Concatenation(
|
180 | new Repetition(new Match("-+"), 0, 1),
|
181 | new Repetition(PosixClass.digit(), 1, INFINITY) |
182 | )), 0, 1), |
183 | new Repetition(new Match("fFdD"), 0, 1)
|
184 | ))), new Union( |
185 | |
186 | /** |
187 | * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? |
188 | */ |
189 | new Concatenation( |
190 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
191 | new Match("eE"), new Concatenation(
|
192 | new Repetition(new Match("-+"), 0, 1), new Concatenation(
|
193 | new Repetition(PosixClass.digit(), 1, INFINITY), |
194 | new Repetition(new Match("fFdD"), 0, 1)
|
195 | )))), |
196 | |
197 | /** |
198 | * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] |
199 | */ |
200 | new Concatenation( |
201 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
202 | new Repetition(new Concatenation( |
203 | new Match("eE"), new Concatenation(
|
204 | new Repetition(new Match("-+"), 0, 1),
|
205 | new Repetition(PosixClass.digit(), 1, INFINITY) |
206 | )), 0, 1), |
207 | new Match("fFdD")
|
208 | )) |
209 | )))); |
210 | |
211 | /** |
212 | * 19.3 Terminals from section 3.10.3: Boolean Literal |
213 | */ |
214 | put("BOOLEAN_LITERAL", new Union(
|
215 | new Singleton("true"),
|
216 | new Singleton("false")
|
217 | )); |
218 | |
219 | /** |
220 | * 19.3 Terminals from section 3.10.4: Character Literal |
221 | */ |
222 | put("CHARACTER_LITERAL", new Concatenation(
|
223 | new Singleton("'"), new Concatenation(
|
224 | new Union( |
225 | |
226 | /** |
227 | * Single Character: [^\r\n'\\] |
228 | */ |
229 | new NonMatch("\r\n'\\"),
|
230 | |
231 | /** |
232 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
|
233 | */ |
234 | new Concatenation( |
235 | new Singleton("\\"),
|
236 | new Union( |
237 | new Match("btnfr\"'\\"),
|
238 | new Concatenation( |
239 | new Repetition(new Range('0', '3'), 0, 1),
|
240 | new Repetition(new Range('0', '7'), 1, 2)
|
241 | ) |
242 | ) |
243 | ) |
244 | ), |
245 | new Singleton("'")
|
246 | ))); |
247 | |
248 | /** |
249 | * 19.3 Terminals from section 3.10.5: String Literal |
250 | */ |
251 | put("STRING_LITERAL", new Concatenation(
|
252 | new Singleton("\""), new Concatenation(
|
253 | new Repetition( |
254 | new Union( |
255 | |
256 | /** |
257 | * Single Character: [^\r\n"\\] |
258 | */ |
259 | new NonMatch("\r\n\"\\"),
|
260 | |
261 | /** |
262 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
|
263 | */ |
264 | new Concatenation( |
265 | new Singleton("\\"),
|
266 | new Union( |
267 | new Match("btnfr\"'\\"),
|
268 | new Concatenation( |
269 | new Repetition(new Range('0', '3'), 0, 1),
|
270 | new Repetition(new Range('0', '7'), 1, 2)
|
271 | ) |
272 | ) |
273 | ) |
274 | ), 0, INFINITY |
275 | ), |
276 | new Singleton("\"")
|
277 | ))); |
278 | |
279 | /** |
280 | * 19.3 Terminals section 3.10.7: Null Literal |
281 | */ |
282 | put("NULL_LITERAL", new Singleton("null"));
|
283 | |
284 | // OK, it seems we have to add some more stuff... |
285 | |
286 | //put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
|
287 | //put("OTHER1", new NonMatch("")); // catch anything, one character at a time
|
288 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time
|
289 | |
290 | } |
291 | } // class Java20 |
292 | } |
Began life as a copy of #646
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #648 |
| Snippet name: | Lexicon test 2 (tokenizing Java) |
| Eternal ID of this version: | #648/1 |
| Text MD5: | fe078705832195394c64b47b92834d91 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX source code |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-06-27 17:07:16 |
| Source code size: | 7606 bytes / 292 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 1109 / 1009 |
| Referenced in: | [show references] |