1 | !636 |
2 | !629 // standard functions |
3 | !1000305 // class JSONTokenizer |
4 | |
5 | import JSONTokenizer.TokenType; |
6 | |
7 | main {
|
8 | psvm {
|
9 | String src = takeInput(args, null); |
10 | |
11 | src = src.replace("\r\n", "\n");
|
12 | JSONTokenizer lex = new JSONTokenizer(src); |
13 | |
14 | List<T> list = new ArrayList<T>(); |
15 | for (TokenType a; (a = lex.nextToken()) != TokenType.EOF;) {
|
16 | String word = lex.getToken(); |
17 | String q = quote(word); |
18 | T t = new T(a, word); |
19 | boolean isSpace = t.isSpace(); |
20 | if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) |
21 | list.get(list.size()-1).word += word; // merge spaces |
22 | else |
23 | list.add(t); |
24 | } |
25 | |
26 | List<String> cnc = new ArrayList<String>(); |
27 | for (int i = 0; i < list.size(); ) {
|
28 | T t = list.get(i); |
29 | boolean shouldBeSpace = (cnc.size() % 2) == 0; |
30 | boolean isSpace = t.isSpace(); |
31 | if (shouldBeSpace == isSpace) {
|
32 | cnc.add(t.word); |
33 | ++i; |
34 | } else if (shouldBeSpace) |
35 | cnc.add("");
|
36 | else {
|
37 | System.out.println(cncToLines(cnc)); |
38 | throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word));
|
39 | } |
40 | } |
41 | if ((cnc.size() % 2) == 0) |
42 | cnc.add("");
|
43 | |
44 | saveTextFile("output/output.txt", cncToLines(cnc));
|
45 | } |
46 | |
47 | static class T {
|
48 | TokenType a; String word; |
49 | |
50 | T(TokenType a, String word) { this.a = a; this.word = word; }
|
51 | |
52 | boolean isSpace() {
|
53 | return a.equals("WHITE_SPACE") || a.equals("COMMENT");
|
54 | } |
55 | } |
56 | |
57 | static String cncToLines(List<String> cnc) {
|
58 | StringBuilder out = new StringBuilder(); |
59 | for (String token : cnc) |
60 | out.append(quote(token) + "\n"); |
61 | return out.toString(); |
62 | } |
63 | |
64 | static String takeInput(String[] args, String def) tex {
|
65 | if (args.length != 0) return loadSnippet(args[0]); |
66 | return loadTextFile("input/input.txt", def);
|
67 | } |
68 | |
69 | public static String quote(String s) {
|
70 | if (s == null) return "null"; |
71 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\"";
|
72 | } |
73 | |
74 | static class Java20 extends Lexicon {
|
75 | |
76 | Java20() {
|
77 | |
78 | /** |
79 | * Grammar for Java 2.0. |
80 | * |
81 | * Nonterminal - first letter uppercase |
82 | * TERMINAL - all letters uppercase |
83 | * keyword - all letters lowercase |
84 | */ |
85 | int INFINITY = -1; |
86 | |
87 | /** |
88 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
89 | */ |
90 | put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));
|
91 | |
92 | /** |
93 | * 19.3 Terminals from section 3.7: Comment |
94 | */ |
95 | put("COMMENT", new Union(
|
96 | |
97 | // |
98 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
99 | // |
100 | new Concatenation( |
101 | new Singleton("/*"), new Concatenation(
|
102 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
|
103 | new Repetition( |
104 | new Concatenation( |
105 | new Singleton("*"),
|
106 | new Repetition(new Concatenation( |
107 | new NonMatch("*/"),
|
108 | new Repetition(new NonMatch("*"), 0, INFINITY)
|
109 | ), 0, 1) |
110 | ), 0, INFINITY |
111 | ), |
112 | new Singleton("*/")
|
113 | ))), new Union( |
114 | |
115 | /** |
116 | * End Of Line Comment: //[^\n]*\n |
117 | */ |
118 | new Concatenation( |
119 | new Singleton("//"), new Concatenation(
|
120 | new Repetition(new NonMatch("\n"), 0, INFINITY),
|
121 | new Singleton("\n")
|
122 | )), |
123 | |
124 | // |
125 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
126 | // |
127 | new Concatenation( |
128 | new Singleton("/**"), new Concatenation(
|
129 | new Repetition( |
130 | new Concatenation( |
131 | new Repetition(new Concatenation( |
132 | new NonMatch("*/"),
|
133 | new Repetition(new NonMatch("*"), 0, INFINITY)
|
134 | ), 0, 1), |
135 | new Singleton("*")
|
136 | ), 0, INFINITY |
137 | ), |
138 | new Singleton("/")
|
139 | )) |
140 | ))); |
141 | |
142 | put("IDENTIFIER", new Concatenation(
|
143 | new Union( |
144 | PosixClass.alpha(), |
145 | new Match("_$")
|
146 | ), |
147 | new Repetition( |
148 | new Union( |
149 | PosixClass.alnum(), |
150 | new Match("_$")
|
151 | ), 0, INFINITY |
152 | ) |
153 | )); |
154 | |
155 | /** |
156 | * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) |
157 | */ |
158 | put("KEYWORD", new Union(
|
159 | new Singleton("const"),
|
160 | new Singleton("goto")
|
161 | )); |
162 | |
163 | /** |
164 | * 19.3 Terminals from section 3.10.1: Integer Literal |
165 | */ |
166 | put("INTEGER_LITERAL", new Concatenation(
|
167 | new Union( |
168 | /** |
169 | * Decimal Integer Literal: 0|[1-9][[:digit:]]* |
170 | */ |
171 | new Singleton("0"), new Union(
|
172 | |
173 | new Concatenation( |
174 | new Range('1', '9'),
|
175 | new Repetition(PosixClass.digit(), 0, INFINITY) |
176 | ), new Union( |
177 | |
178 | /** |
179 | * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ |
180 | */ |
181 | new Concatenation( |
182 | new Singleton("0"), new Concatenation(
|
183 | new Match("xX"),
|
184 | new Repetition(PosixClass.xdigit(), 1, INFINITY) |
185 | )), |
186 | |
187 | /** |
188 | * Octal Integer Literal: 0[0-7]+ |
189 | */ |
190 | new Concatenation( |
191 | new Singleton("0"),
|
192 | new Repetition(new Range('0', '7'), 1, INFINITY)
|
193 | ) |
194 | ))), |
195 | new Repetition(new Match("lL"), 0, 1)
|
196 | )); |
197 | |
198 | /** |
199 | * 19.3 Terminals from section 3.10.2: Floating-Point Literal |
200 | */ |
201 | put("FLOATING_POINT_LITERAL", new Union(
|
202 | |
203 | /** |
204 | * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? |
205 | */ |
206 | new Concatenation( |
207 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
208 | new Singleton("."), new Concatenation(
|
209 | new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( |
210 | new Repetition(new Concatenation( |
211 | new Match("eE"), new Concatenation(
|
212 | new Repetition(new Match("-+"), 0, 1),
|
213 | new Repetition(PosixClass.digit(), 1, INFINITY) |
214 | )), 0, 1), |
215 | new Repetition(new Match("fFdD"), 0, 1)
|
216 | )))), new Union( |
217 | |
218 | /** |
219 | * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? |
220 | */ |
221 | new Concatenation( |
222 | new Singleton("."), new Concatenation(
|
223 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
224 | new Repetition(new Concatenation( |
225 | new Match("eE"), new Concatenation(
|
226 | new Repetition(new Match("-+"), 0, 1),
|
227 | new Repetition(PosixClass.digit(), 1, INFINITY) |
228 | )), 0, 1), |
229 | new Repetition(new Match("fFdD"), 0, 1)
|
230 | ))), new Union( |
231 | |
232 | /** |
233 | * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? |
234 | */ |
235 | new Concatenation( |
236 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
237 | new Match("eE"), new Concatenation(
|
238 | new Repetition(new Match("-+"), 0, 1), new Concatenation(
|
239 | new Repetition(PosixClass.digit(), 1, INFINITY), |
240 | new Repetition(new Match("fFdD"), 0, 1)
|
241 | )))), |
242 | |
243 | /** |
244 | * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] |
245 | */ |
246 | new Concatenation( |
247 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
248 | new Repetition(new Concatenation( |
249 | new Match("eE"), new Concatenation(
|
250 | new Repetition(new Match("-+"), 0, 1),
|
251 | new Repetition(PosixClass.digit(), 1, INFINITY) |
252 | )), 0, 1), |
253 | new Match("fFdD")
|
254 | )) |
255 | )))); |
256 | |
257 | /** |
258 | * 19.3 Terminals from section 3.10.3: Boolean Literal |
259 | */ |
260 | put("BOOLEAN_LITERAL", new Union(
|
261 | new Singleton("true"),
|
262 | new Singleton("false")
|
263 | )); |
264 | |
265 | /** |
266 | * 19.3 Terminals from section 3.10.4: Character Literal |
267 | */ |
268 | put("CHARACTER_LITERAL", new Concatenation(
|
269 | new Singleton("'"), new Concatenation(
|
270 | new Union( |
271 | |
272 | /** |
273 | * Single Character: [^\r\n'\\] |
274 | */ |
275 | new NonMatch("\r\n'\\"),
|
276 | |
277 | /** |
278 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
|
279 | */ |
280 | new Concatenation( |
281 | new Singleton("\\"),
|
282 | new Union( |
283 | new Match("btnfr\"'\\"),
|
284 | new Concatenation( |
285 | new Repetition(new Range('0', '3'), 0, 1),
|
286 | new Repetition(new Range('0', '7'), 1, 2)
|
287 | ) |
288 | ) |
289 | ) |
290 | ), |
291 | new Singleton("'")
|
292 | ))); |
293 | |
294 | /** |
295 | * 19.3 Terminals from section 3.10.5: String Literal |
296 | */ |
297 | put("STRING_LITERAL", new Concatenation(
|
298 | new Singleton("\""), new Concatenation(
|
299 | new Repetition( |
300 | new Union( |
301 | |
302 | /** |
303 | * Single Character: [^\r\n"\\] |
304 | */ |
305 | new NonMatch("\r\n\"\\"),
|
306 | |
307 | /** |
308 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
|
309 | */ |
310 | new Concatenation( |
311 | new Singleton("\\"),
|
312 | new Union( |
313 | new Match("btnfr\"'\\"),
|
314 | new Concatenation( |
315 | new Repetition(new Range('0', '3'), 0, 1),
|
316 | new Repetition(new Range('0', '7'), 1, 2)
|
317 | ) |
318 | ) |
319 | ) |
320 | ), 0, INFINITY |
321 | ), |
322 | new Singleton("\"")
|
323 | ))); |
324 | |
325 | /** |
326 | * 19.3 Terminals section 3.10.7: Null Literal |
327 | */ |
328 | put("NULL_LITERAL", new Singleton("null"));
|
329 | |
330 | // OK, it seems we have to add some more stuff... |
331 | |
332 | //put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
|
333 | //put("OTHER1", new NonMatch("")); // catch anything, one character at a time
|
334 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time
|
335 | |
336 | } |
337 | } // class Java20 |
338 | } |
Began life as a copy of #651
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1000306 |
| Snippet name: | JSON tokenizer (abandoned) |
| Eternal ID of this version: | #1000306/1 |
| Text MD5: | 0b7580f93160040e20c30bbbd9290d03 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX (input.txt to output.txt) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-06-28 18:43:47 |
| Source code size: | 8808 bytes / 338 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 1077 / 866 |
| Referenced in: | [show references] |