1 | !636 |
2 | !629 // standard functions |
3 | !1000300 // class Lexicon |
4 | |
5 | main { |
6 | psvm { |
7 | String src = takeInput(args, "class main {\n String s;\n}"); |
8 | Java20 lex = new Java20(); |
9 | src = src.replace("\r\n", "\n"); |
10 | LineNumberReader source = new LineNumberReader(new StringReader(src)); |
11 | int lineNr = source.getLineNumber()+1; |
12 | for (Object a; (a = lex.grab(source)) != lex.$;) { |
13 | System.out.println("grabbed at line " + lineNr + ": " + a + " " + quote(lex.word())); |
14 | lineNr = source.getLineNumber()+1; |
15 | } |
16 | } |
17 | |
18 | static String takeInput(String[] args, String def) tex { |
19 | if (args.length != 0) return loadSnippet(args[0]); |
20 | return loadTextFile("input/input.txt", def); |
21 | } |
22 | |
23 | public static String quote(String s) { |
24 | if (s == null) return "null"; |
25 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; |
26 | } |
27 | |
28 | static class Java20 extends Lexicon { |
29 | |
30 | Java20() { |
31 | |
32 | /** |
33 | * Grammar for Java 2.0. |
34 | * |
35 | * Nonterminal - first letter uppercase |
36 | * TERMINAL - all letters uppercase |
37 | * keyword - all letters lowercase |
38 | */ |
39 | int INFINITY = -1; |
40 | |
41 | /** |
42 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
43 | */ |
44 | put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY)); |
45 | |
46 | /** |
47 | * 19.3 Terminals from section 3.7: Comment |
48 | */ |
49 | put("COMMENT", new Union( |
50 | |
51 | // |
52 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
53 | // |
54 | new Concatenation( |
55 | new Singleton("/*"), new Concatenation( |
56 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( |
57 | new Repetition( |
58 | new Concatenation( |
59 | new Singleton("*"), |
60 | new Repetition(new Concatenation( |
61 | new NonMatch("*/"), |
62 | new Repetition(new NonMatch("*"), 0, INFINITY) |
63 | ), 0, 1) |
64 | ), 0, INFINITY |
65 | ), |
66 | new Singleton("*/") |
67 | ))), new Union( |
68 | |
69 | /** |
70 | * End Of Line Comment: //[^\n]*\n |
71 | */ |
72 | new Concatenation( |
73 | new Singleton("//"), new Concatenation( |
74 | new Repetition(new NonMatch("\n"), 0, INFINITY), |
75 | new Singleton("\n") |
76 | )), |
77 | |
78 | // |
79 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
80 | // |
81 | new Concatenation( |
82 | new Singleton("/**"), new Concatenation( |
83 | new Repetition( |
84 | new Concatenation( |
85 | new Repetition(new Concatenation( |
86 | new NonMatch("*/"), |
87 | new Repetition(new NonMatch("*"), 0, INFINITY) |
88 | ), 0, 1), |
89 | new Singleton("*") |
90 | ), 0, INFINITY |
91 | ), |
92 | new Singleton("/") |
93 | )) |
94 | ))); |
95 | |
96 | put("IDENTIFIER", new Concatenation( |
97 | new Union( |
98 | PosixClass.alpha(), |
99 | new Match("_$") |
100 | ), |
101 | new Repetition( |
102 | new Union( |
103 | PosixClass.alnum(), |
104 | new Match("_$") |
105 | ), 0, INFINITY |
106 | ) |
107 | )); |
108 | |
109 | /** |
110 | * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) |
111 | */ |
112 | put("KEYWORD", new Union( |
113 | new Singleton("const"), |
114 | new Singleton("goto") |
115 | )); |
116 | |
117 | /** |
118 | * 19.3 Terminals from section 3.10.1: Integer Literal |
119 | */ |
120 | put("INTEGER_LITERAL", new Concatenation( |
121 | new Union( |
122 | /** |
123 | * Decimal Integer Literal: 0|[1-9][[:digit:]]* |
124 | */ |
125 | new Singleton("0"), new Union( |
126 | |
127 | new Concatenation( |
128 | new Range('1', '9'), |
129 | new Repetition(PosixClass.digit(), 0, INFINITY) |
130 | ), new Union( |
131 | |
132 | /** |
133 | * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ |
134 | */ |
135 | new Concatenation( |
136 | new Singleton("0"), new Concatenation( |
137 | new Match("xX"), |
138 | new Repetition(PosixClass.xdigit(), 1, INFINITY) |
139 | )), |
140 | |
141 | /** |
142 | * Octal Integer Literal: 0[0-7]+ |
143 | */ |
144 | new Concatenation( |
145 | new Singleton("0"), |
146 | new Repetition(new Range('0', '7'), 1, INFINITY) |
147 | ) |
148 | ))), |
149 | new Repetition(new Match("lL"), 0, 1) |
150 | )); |
151 | |
152 | /** |
153 | * 19.3 Terminals from section 3.10.2: Floating-Point Literal |
154 | */ |
155 | put("FLOATING_POINT_LITERAL", new Union( |
156 | |
157 | /** |
158 | * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? |
159 | */ |
160 | new Concatenation( |
161 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
162 | new Singleton("."), new Concatenation( |
163 | new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( |
164 | new Repetition(new Concatenation( |
165 | new Match("eE"), new Concatenation( |
166 | new Repetition(new Match("-+"), 0, 1), |
167 | new Repetition(PosixClass.digit(), 1, INFINITY) |
168 | )), 0, 1), |
169 | new Repetition(new Match("fFdD"), 0, 1) |
170 | )))), new Union( |
171 | |
172 | /** |
173 | * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? |
174 | */ |
175 | new Concatenation( |
176 | new Singleton("."), new Concatenation( |
177 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
178 | new Repetition(new Concatenation( |
179 | new Match("eE"), new Concatenation( |
180 | new Repetition(new Match("-+"), 0, 1), |
181 | new Repetition(PosixClass.digit(), 1, INFINITY) |
182 | )), 0, 1), |
183 | new Repetition(new Match("fFdD"), 0, 1) |
184 | ))), new Union( |
185 | |
186 | /** |
187 | * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? |
188 | */ |
189 | new Concatenation( |
190 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
191 | new Match("eE"), new Concatenation( |
192 | new Repetition(new Match("-+"), 0, 1), new Concatenation( |
193 | new Repetition(PosixClass.digit(), 1, INFINITY), |
194 | new Repetition(new Match("fFdD"), 0, 1) |
195 | )))), |
196 | |
197 | /** |
198 | * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] |
199 | */ |
200 | new Concatenation( |
201 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
202 | new Repetition(new Concatenation( |
203 | new Match("eE"), new Concatenation( |
204 | new Repetition(new Match("-+"), 0, 1), |
205 | new Repetition(PosixClass.digit(), 1, INFINITY) |
206 | )), 0, 1), |
207 | new Match("fFdD") |
208 | )) |
209 | )))); |
210 | |
211 | /** |
212 | * 19.3 Terminals from section 3.10.3: Boolean Literal |
213 | */ |
214 | put("BOOLEAN_LITERAL", new Union( |
215 | new Singleton("true"), |
216 | new Singleton("false") |
217 | )); |
218 | |
219 | /** |
220 | * 19.3 Terminals from section 3.10.4: Character Literal |
221 | */ |
222 | put("CHARACTER_LITERAL", new Concatenation( |
223 | new Singleton("'"), new Concatenation( |
224 | new Union( |
225 | |
226 | /** |
227 | * Single Character: [^\r\n'\\] |
228 | */ |
229 | new NonMatch("\r\n'\\"), |
230 | |
231 | /** |
232 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
233 | */ |
234 | new Concatenation( |
235 | new Singleton("\\"), |
236 | new Union( |
237 | new Match("btnfr\"'\\"), |
238 | new Concatenation( |
239 | new Repetition(new Range('0', '3'), 0, 1), |
240 | new Repetition(new Range('0', '7'), 1, 2) |
241 | ) |
242 | ) |
243 | ) |
244 | ), |
245 | new Singleton("'") |
246 | ))); |
247 | |
248 | /** |
249 | * 19.3 Terminals from section 3.10.5: String Literal |
250 | */ |
251 | put("STRING_LITERAL", new Concatenation( |
252 | new Singleton("\""), new Concatenation( |
253 | new Repetition( |
254 | new Union( |
255 | |
256 | /** |
257 | * Single Character: [^\r\n"\\] |
258 | */ |
259 | new NonMatch("\r\n\"\\"), |
260 | |
261 | /** |
262 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
263 | */ |
264 | new Concatenation( |
265 | new Singleton("\\"), |
266 | new Union( |
267 | new Match("btnfr\"'\\"), |
268 | new Concatenation( |
269 | new Repetition(new Range('0', '3'), 0, 1), |
270 | new Repetition(new Range('0', '7'), 1, 2) |
271 | ) |
272 | ) |
273 | ) |
274 | ), 0, INFINITY |
275 | ), |
276 | new Singleton("\"") |
277 | ))); |
278 | |
279 | /** |
280 | * 19.3 Terminals section 3.10.7: Null Literal |
281 | */ |
282 | put("NULL_LITERAL", new Singleton("null")); |
283 | |
284 | // OK, it seems we have to add some more stuff... |
285 | |
286 | //put("OTHER1", new Match(";{}=,<>[]().+-:|&!")); |
287 | //put("OTHER1", new NonMatch("")); // catch anything, one character at a time |
288 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time |
289 | |
290 | } |
291 | } // class Java20 |
292 | } |
Began life as a copy of #646
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #648 |
Snippet name: | Lexicon test 2 (tokenizing Java) |
Eternal ID of this version: | #648/1 |
Text MD5: | fe078705832195394c64b47b92834d91 |
Author: | stefan |
Category: | javax |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-06-27 17:07:16 |
Source code size: | 7606 bytes / 292 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 723 / 623 |
Referenced in: | [show references] |