1 | !636 |
2 | !629 // standard functions |
3 | !1000300 // class Lexicon |
4 | |
5 | main {
|
6 | psvm {
|
7 | String src = takeInput(args, null); |
8 | JSON lex = new JSON(); |
9 | src = src.replace("\r\n", "\n");
|
10 | LineNumberReader source = new LineNumberReader(new StringReader(src)); |
11 | int lineNr = source.getLineNumber()+1; |
12 | List<T> list = new ArrayList<T>(); |
13 | for (Object a; (a = lex.grab(source)) != lex.$;) {
|
14 | String word = lex.word(); |
15 | String q = quote(word); |
16 | //System.out.println("grabbed at line " + lineNr + ": " + a + " " + q);
|
17 | lineNr = source.getLineNumber()+1; |
18 | |
19 | T t = new T(a, word); |
20 | boolean isSpace = t.isSpace(); |
21 | if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) |
22 | list.get(list.size()-1).word += word; // merge spaces |
23 | else |
24 | list.add(t); |
25 | } |
26 | |
27 | List<String> cnc = new ArrayList<String>(); |
28 | for (int i = 0; i < list.size(); ) {
|
29 | T t = list.get(i); |
30 | boolean shouldBeSpace = (cnc.size() % 2) == 0; |
31 | boolean isSpace = t.isSpace(); |
32 | if (shouldBeSpace == isSpace) {
|
33 | cnc.add(t.word); |
34 | ++i; |
35 | } else if (shouldBeSpace) |
36 | cnc.add("");
|
37 | else {
|
38 | System.out.println(cncToLines(cnc)); |
39 | throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word));
|
40 | } |
41 | } |
42 | if ((cnc.size() % 2) == 0) |
43 | cnc.add("");
|
44 | |
45 | saveTextFile("output/output.txt", cncToLines(cnc));
|
46 | } |
47 | |
48 | static class T {
|
49 | Object a; String word; |
50 | |
51 | T(Object a, String word) { this.a = a; this.word = word; }
|
52 | |
53 | boolean isSpace() {
|
54 | return a.equals("WHITE_SPACE") || a.equals("COMMENT");
|
55 | } |
56 | } |
57 | |
58 | static String cncToLines(List<String> cnc) {
|
59 | StringBuilder out = new StringBuilder(); |
60 | for (String token : cnc) |
61 | out.append(quote(token) + "\n"); |
62 | return out.toString(); |
63 | } |
64 | |
65 | static String takeInput(String[] args, String def) tex {
|
66 | if (args.length != 0) return loadSnippet(args[0]); |
67 | return loadTextFile("input/input.txt", def);
|
68 | } |
69 | |
70 | public static String quote(String s) {
|
71 | if (s == null) return "null"; |
72 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\"";
|
73 | } |
74 | |
75 | static class JSON extends Lexicon {
|
76 | |
77 | JSON() {
|
78 | |
79 | /** |
80 | * Grammar for Java 2.0. |
81 | * |
82 | * Nonterminal - first letter uppercase |
83 | * TERMINAL - all letters uppercase |
84 | * keyword - all letters lowercase |
85 | */ |
86 | int INFINITY = -1; |
87 | |
88 | /** |
89 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
90 | */ |
91 | put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));
|
92 | |
93 | /** |
94 | * 19.3 Terminals from section 3.7: Comment |
95 | */ |
96 | put("COMMENT", new Union(
|
97 | |
98 | // |
99 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
100 | // |
101 | new Concatenation( |
102 | new Singleton("/*"), new Concatenation(
|
103 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
|
104 | new Repetition( |
105 | new Concatenation( |
106 | new Singleton("*"),
|
107 | new Repetition(new Concatenation( |
108 | new NonMatch("*/"),
|
109 | new Repetition(new NonMatch("*"), 0, INFINITY)
|
110 | ), 0, 1) |
111 | ), 0, INFINITY |
112 | ), |
113 | new Singleton("*/")
|
114 | ))), new Union( |
115 | |
116 | /** |
117 | * End Of Line Comment: //[^\n]*\n |
118 | */ |
119 | new Concatenation( |
120 | new Singleton("//"), new Concatenation(
|
121 | new Repetition(new NonMatch("\n"), 0, INFINITY),
|
122 | new Singleton("\n")
|
123 | )), |
124 | |
125 | // |
126 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
127 | // |
128 | new Concatenation( |
129 | new Singleton("/**"), new Concatenation(
|
130 | new Repetition( |
131 | new Concatenation( |
132 | new Repetition(new Concatenation( |
133 | new NonMatch("*/"),
|
134 | new Repetition(new NonMatch("*"), 0, INFINITY)
|
135 | ), 0, 1), |
136 | new Singleton("*")
|
137 | ), 0, INFINITY |
138 | ), |
139 | new Singleton("/")
|
140 | )) |
141 | ))); |
142 | |
143 | put("IDENTIFIER", new Concatenation(
|
144 | new Union( |
145 | PosixClass.alpha(), |
146 | new Match("_$")
|
147 | ), |
148 | new Repetition( |
149 | new Union( |
150 | PosixClass.alnum(), |
151 | new Match("_$")
|
152 | ), 0, INFINITY |
153 | ) |
154 | )); |
155 | |
156 | /** |
157 | * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) |
158 | */ |
159 | put("KEYWORD", new Union(
|
160 | new Singleton("const"),
|
161 | new Singleton("goto")
|
162 | )); |
163 | |
164 | /** |
165 | * 19.3 Terminals from section 3.10.1: Integer Literal |
166 | */ |
167 | put("INTEGER_LITERAL", new Concatenation(
|
168 | new Union( |
169 | /** |
170 | * Decimal Integer Literal: 0|[1-9][[:digit:]]* |
171 | */ |
172 | new Singleton("0"), new Union(
|
173 | |
174 | new Concatenation( |
175 | new Range('1', '9'),
|
176 | new Repetition(PosixClass.digit(), 0, INFINITY) |
177 | ), new Union( |
178 | |
179 | /** |
180 | * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ |
181 | */ |
182 | new Concatenation( |
183 | new Singleton("0"), new Concatenation(
|
184 | new Match("xX"),
|
185 | new Repetition(PosixClass.xdigit(), 1, INFINITY) |
186 | )), |
187 | |
188 | /** |
189 | * Octal Integer Literal: 0[0-7]+ |
190 | */ |
191 | new Concatenation( |
192 | new Singleton("0"),
|
193 | new Repetition(new Range('0', '7'), 1, INFINITY)
|
194 | ) |
195 | ))), |
196 | new Repetition(new Match("lL"), 0, 1)
|
197 | )); |
198 | |
199 | /** |
200 | * 19.3 Terminals from section 3.10.2: Floating-Point Literal |
201 | */ |
202 | put("FLOATING_POINT_LITERAL", new Union(
|
203 | |
204 | /** |
205 | * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? |
206 | */ |
207 | new Concatenation( |
208 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
209 | new Singleton("."), new Concatenation(
|
210 | new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( |
211 | new Repetition(new Concatenation( |
212 | new Match("eE"), new Concatenation(
|
213 | new Repetition(new Match("-+"), 0, 1),
|
214 | new Repetition(PosixClass.digit(), 1, INFINITY) |
215 | )), 0, 1), |
216 | new Repetition(new Match("fFdD"), 0, 1)
|
217 | )))), new Union( |
218 | |
219 | /** |
220 | * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? |
221 | */ |
222 | new Concatenation( |
223 | new Singleton("."), new Concatenation(
|
224 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
225 | new Repetition(new Concatenation( |
226 | new Match("eE"), new Concatenation(
|
227 | new Repetition(new Match("-+"), 0, 1),
|
228 | new Repetition(PosixClass.digit(), 1, INFINITY) |
229 | )), 0, 1), |
230 | new Repetition(new Match("fFdD"), 0, 1)
|
231 | ))), new Union( |
232 | |
233 | /** |
234 | * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? |
235 | */ |
236 | new Concatenation( |
237 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
238 | new Match("eE"), new Concatenation(
|
239 | new Repetition(new Match("-+"), 0, 1), new Concatenation(
|
240 | new Repetition(PosixClass.digit(), 1, INFINITY), |
241 | new Repetition(new Match("fFdD"), 0, 1)
|
242 | )))), |
243 | |
244 | /** |
245 | * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] |
246 | */ |
247 | new Concatenation( |
248 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
249 | new Repetition(new Concatenation( |
250 | new Match("eE"), new Concatenation(
|
251 | new Repetition(new Match("-+"), 0, 1),
|
252 | new Repetition(PosixClass.digit(), 1, INFINITY) |
253 | )), 0, 1), |
254 | new Match("fFdD")
|
255 | )) |
256 | )))); |
257 | |
258 | /** |
259 | * 19.3 Terminals from section 3.10.3: Boolean Literal |
260 | */ |
261 | put("BOOLEAN_LITERAL", new Union(
|
262 | new Singleton("true"),
|
263 | new Singleton("false")
|
264 | )); |
265 | |
266 | /** |
267 | * 19.3 Terminals from section 3.10.4: Character Literal |
268 | */ |
269 | put("CHARACTER_LITERAL", new Concatenation(
|
270 | new Singleton("'"), new Concatenation(
|
271 | new Union( |
272 | |
273 | /** |
274 | * Single Character: [^\r\n'\\] |
275 | */ |
276 | new NonMatch("\r\n'\\"),
|
277 | |
278 | /** |
279 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
|
280 | */ |
281 | new Concatenation( |
282 | new Singleton("\\"),
|
283 | new Union( |
284 | new Match("btnfr\"'\\"),
|
285 | new Concatenation( |
286 | new Repetition(new Range('0', '3'), 0, 1),
|
287 | new Repetition(new Range('0', '7'), 1, 2)
|
288 | ) |
289 | ) |
290 | ) |
291 | ), |
292 | new Singleton("'")
|
293 | ))); |
294 | |
295 | /** |
296 | * 19.3 Terminals from section 3.10.5: String Literal |
297 | */ |
298 | put("STRING_LITERAL", new Concatenation(
|
299 | new Singleton("\""), new Concatenation(
|
300 | new Repetition( |
301 | new Union( |
302 | |
303 | /** |
304 | * Single Character: [^\r\n"\\] |
305 | */ |
306 | new NonMatch("\r\n\"\\"),
|
307 | |
308 | /** |
309 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
|
310 | */ |
311 | new Concatenation( |
312 | new Singleton("\\"),
|
313 | new Union( |
314 | new Match("btnfr\"'\\"),
|
315 | new Concatenation( |
316 | new Repetition(new Range('0', '3'), 0, 1),
|
317 | new Repetition(new Range('0', '7'), 1, 2)
|
318 | ) |
319 | ) |
320 | ) |
321 | ), 0, INFINITY |
322 | ), |
323 | new Singleton("\"")
|
324 | ))); |
325 | |
326 | /** |
327 | * 19.3 Terminals section 3.10.7: Null Literal |
328 | */ |
329 | put("NULL_LITERAL", new Singleton("null"));
|
330 | |
331 | // Catch-all |
332 | |
333 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time
|
334 | |
335 | } |
336 | } // class JSON |
337 | } |
Began life as a copy of #651
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #653 |
| Snippet name: | Official JSON+C tokenizer (developing) |
| Eternal ID of this version: | #653/1 |
| Text MD5: | 0f3d584a202c3c0ab89603543c9005a7 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX (input.txt to output.txt) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-06-28 18:48:24 |
| Source code size: | 8786 bytes / 337 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 972 / 840 |
| Referenced in: | [show references] |