1 | !636 |
2 | !629 // standard functions |
3 | !1000305 // class JSONTokenizer |
4 | |
5 | import JSONTokenizer.TokenType; |
6 | |
7 | main { |
8 | psvm { |
9 | String src = takeInput(args, null); |
10 | |
11 | src = src.replace("\r\n", "\n"); |
12 | JSONTokenizer lex = new JSONTokenizer(src); |
13 | |
14 | List<T> list = new ArrayList<T>(); |
15 | for (TokenType a; (a = lex.nextToken()) != TokenType.EOF;) { |
16 | String word = lex.getToken(); |
17 | String q = quote(word); |
18 | T t = new T(a, word); |
19 | boolean isSpace = t.isSpace(); |
20 | if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) |
21 | list.get(list.size()-1).word += word; // merge spaces |
22 | else |
23 | list.add(t); |
24 | } |
25 | |
26 | List<String> cnc = new ArrayList<String>(); |
27 | for (int i = 0; i < list.size(); ) { |
28 | T t = list.get(i); |
29 | boolean shouldBeSpace = (cnc.size() % 2) == 0; |
30 | boolean isSpace = t.isSpace(); |
31 | if (shouldBeSpace == isSpace) { |
32 | cnc.add(t.word); |
33 | ++i; |
34 | } else if (shouldBeSpace) |
35 | cnc.add(""); |
36 | else { |
37 | System.out.println(cncToLines(cnc)); |
38 | throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word)); |
39 | } |
40 | } |
41 | if ((cnc.size() % 2) == 0) |
42 | cnc.add(""); |
43 | |
44 | saveTextFile("output/output.txt", cncToLines(cnc)); |
45 | } |
46 | |
47 | static class T { |
48 | TokenType a; String word; |
49 | |
50 | T(TokenType a, String word) { this.a = a; this.word = word; } |
51 | |
52 | boolean isSpace() { |
53 | return a.equals("WHITE_SPACE") || a.equals("COMMENT"); |
54 | } |
55 | } |
56 | |
57 | static String cncToLines(List<String> cnc) { |
58 | StringBuilder out = new StringBuilder(); |
59 | for (String token : cnc) |
60 | out.append(quote(token) + "\n"); |
61 | return out.toString(); |
62 | } |
63 | |
64 | static String takeInput(String[] args, String def) tex { |
65 | if (args.length != 0) return loadSnippet(args[0]); |
66 | return loadTextFile("input/input.txt", def); |
67 | } |
68 | |
69 | public static String quote(String s) { |
70 | if (s == null) return "null"; |
71 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; |
72 | } |
73 | |
74 | static class Java20 extends Lexicon { |
75 | |
76 | Java20() { |
77 | |
78 | /** |
79 | * Grammar for Java 2.0. |
80 | * |
81 | * Nonterminal - first letter uppercase |
82 | * TERMINAL - all letters uppercase |
83 | * keyword - all letters lowercase |
84 | */ |
85 | int INFINITY = -1; |
86 | |
87 | /** |
88 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
89 | */ |
90 | put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY)); |
91 | |
92 | /** |
93 | * 19.3 Terminals from section 3.7: Comment |
94 | */ |
95 | put("COMMENT", new Union( |
96 | |
97 | // |
98 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
99 | // |
100 | new Concatenation( |
101 | new Singleton("/*"), new Concatenation( |
102 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( |
103 | new Repetition( |
104 | new Concatenation( |
105 | new Singleton("*"), |
106 | new Repetition(new Concatenation( |
107 | new NonMatch("*/"), |
108 | new Repetition(new NonMatch("*"), 0, INFINITY) |
109 | ), 0, 1) |
110 | ), 0, INFINITY |
111 | ), |
112 | new Singleton("*/") |
113 | ))), new Union( |
114 | |
115 | /** |
116 | * End Of Line Comment: //[^\n]*\n |
117 | */ |
118 | new Concatenation( |
119 | new Singleton("//"), new Concatenation( |
120 | new Repetition(new NonMatch("\n"), 0, INFINITY), |
121 | new Singleton("\n") |
122 | )), |
123 | |
124 | // |
125 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
126 | // |
127 | new Concatenation( |
128 | new Singleton("/**"), new Concatenation( |
129 | new Repetition( |
130 | new Concatenation( |
131 | new Repetition(new Concatenation( |
132 | new NonMatch("*/"), |
133 | new Repetition(new NonMatch("*"), 0, INFINITY) |
134 | ), 0, 1), |
135 | new Singleton("*") |
136 | ), 0, INFINITY |
137 | ), |
138 | new Singleton("/") |
139 | )) |
140 | ))); |
141 | |
142 | put("IDENTIFIER", new Concatenation( |
143 | new Union( |
144 | PosixClass.alpha(), |
145 | new Match("_$") |
146 | ), |
147 | new Repetition( |
148 | new Union( |
149 | PosixClass.alnum(), |
150 | new Match("_$") |
151 | ), 0, INFINITY |
152 | ) |
153 | )); |
154 | |
155 | /** |
156 | * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) |
157 | */ |
158 | put("KEYWORD", new Union( |
159 | new Singleton("const"), |
160 | new Singleton("goto") |
161 | )); |
162 | |
163 | /** |
164 | * 19.3 Terminals from section 3.10.1: Integer Literal |
165 | */ |
166 | put("INTEGER_LITERAL", new Concatenation( |
167 | new Union( |
168 | /** |
169 | * Decimal Integer Literal: 0|[1-9][[:digit:]]* |
170 | */ |
171 | new Singleton("0"), new Union( |
172 | |
173 | new Concatenation( |
174 | new Range('1', '9'), |
175 | new Repetition(PosixClass.digit(), 0, INFINITY) |
176 | ), new Union( |
177 | |
178 | /** |
179 | * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ |
180 | */ |
181 | new Concatenation( |
182 | new Singleton("0"), new Concatenation( |
183 | new Match("xX"), |
184 | new Repetition(PosixClass.xdigit(), 1, INFINITY) |
185 | )), |
186 | |
187 | /** |
188 | * Octal Integer Literal: 0[0-7]+ |
189 | */ |
190 | new Concatenation( |
191 | new Singleton("0"), |
192 | new Repetition(new Range('0', '7'), 1, INFINITY) |
193 | ) |
194 | ))), |
195 | new Repetition(new Match("lL"), 0, 1) |
196 | )); |
197 | |
198 | /** |
199 | * 19.3 Terminals from section 3.10.2: Floating-Point Literal |
200 | */ |
201 | put("FLOATING_POINT_LITERAL", new Union( |
202 | |
203 | /** |
204 | * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? |
205 | */ |
206 | new Concatenation( |
207 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
208 | new Singleton("."), new Concatenation( |
209 | new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( |
210 | new Repetition(new Concatenation( |
211 | new Match("eE"), new Concatenation( |
212 | new Repetition(new Match("-+"), 0, 1), |
213 | new Repetition(PosixClass.digit(), 1, INFINITY) |
214 | )), 0, 1), |
215 | new Repetition(new Match("fFdD"), 0, 1) |
216 | )))), new Union( |
217 | |
218 | /** |
219 | * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? |
220 | */ |
221 | new Concatenation( |
222 | new Singleton("."), new Concatenation( |
223 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
224 | new Repetition(new Concatenation( |
225 | new Match("eE"), new Concatenation( |
226 | new Repetition(new Match("-+"), 0, 1), |
227 | new Repetition(PosixClass.digit(), 1, INFINITY) |
228 | )), 0, 1), |
229 | new Repetition(new Match("fFdD"), 0, 1) |
230 | ))), new Union( |
231 | |
232 | /** |
233 | * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? |
234 | */ |
235 | new Concatenation( |
236 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
237 | new Match("eE"), new Concatenation( |
238 | new Repetition(new Match("-+"), 0, 1), new Concatenation( |
239 | new Repetition(PosixClass.digit(), 1, INFINITY), |
240 | new Repetition(new Match("fFdD"), 0, 1) |
241 | )))), |
242 | |
243 | /** |
244 | * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] |
245 | */ |
246 | new Concatenation( |
247 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
248 | new Repetition(new Concatenation( |
249 | new Match("eE"), new Concatenation( |
250 | new Repetition(new Match("-+"), 0, 1), |
251 | new Repetition(PosixClass.digit(), 1, INFINITY) |
252 | )), 0, 1), |
253 | new Match("fFdD") |
254 | )) |
255 | )))); |
256 | |
257 | /** |
258 | * 19.3 Terminals from section 3.10.3: Boolean Literal |
259 | */ |
260 | put("BOOLEAN_LITERAL", new Union( |
261 | new Singleton("true"), |
262 | new Singleton("false") |
263 | )); |
264 | |
265 | /** |
266 | * 19.3 Terminals from section 3.10.4: Character Literal |
267 | */ |
268 | put("CHARACTER_LITERAL", new Concatenation( |
269 | new Singleton("'"), new Concatenation( |
270 | new Union( |
271 | |
272 | /** |
273 | * Single Character: [^\r\n'\\] |
274 | */ |
275 | new NonMatch("\r\n'\\"), |
276 | |
277 | /** |
278 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
279 | */ |
280 | new Concatenation( |
281 | new Singleton("\\"), |
282 | new Union( |
283 | new Match("btnfr\"'\\"), |
284 | new Concatenation( |
285 | new Repetition(new Range('0', '3'), 0, 1), |
286 | new Repetition(new Range('0', '7'), 1, 2) |
287 | ) |
288 | ) |
289 | ) |
290 | ), |
291 | new Singleton("'") |
292 | ))); |
293 | |
294 | /** |
295 | * 19.3 Terminals from section 3.10.5: String Literal |
296 | */ |
297 | put("STRING_LITERAL", new Concatenation( |
298 | new Singleton("\""), new Concatenation( |
299 | new Repetition( |
300 | new Union( |
301 | |
302 | /** |
303 | * Single Character: [^\r\n"\\] |
304 | */ |
305 | new NonMatch("\r\n\"\\"), |
306 | |
307 | /** |
308 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
309 | */ |
310 | new Concatenation( |
311 | new Singleton("\\"), |
312 | new Union( |
313 | new Match("btnfr\"'\\"), |
314 | new Concatenation( |
315 | new Repetition(new Range('0', '3'), 0, 1), |
316 | new Repetition(new Range('0', '7'), 1, 2) |
317 | ) |
318 | ) |
319 | ) |
320 | ), 0, INFINITY |
321 | ), |
322 | new Singleton("\"") |
323 | ))); |
324 | |
325 | /** |
326 | * 19.3 Terminals section 3.10.7: Null Literal |
327 | */ |
328 | put("NULL_LITERAL", new Singleton("null")); |
329 | |
330 | // OK, it seems we have to add some more stuff... |
331 | |
332 | //put("OTHER1", new Match(";{}=,<>[]().+-:|&!")); |
333 | //put("OTHER1", new NonMatch("")); // catch anything, one character at a time |
334 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time |
335 | |
336 | } |
337 | } // class Java20 |
338 | } |
Began life as a copy of #651
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1000306 |
Snippet name: | JSON tokenizer (abandoned) |
Eternal ID of this version: | #1000306/1 |
Text MD5: | 0b7580f93160040e20c30bbbd9290d03 |
Author: | stefan |
Category: | javax |
Type: | JavaX (input.txt to output.txt) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-06-28 18:43:47 |
Source code size: | 8808 bytes / 338 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 872 / 652 |
Referenced in: | [show references] |