| 1 | !636 | 
| 2 | !629 // standard functions | 
| 3 | !1000305 // class JSONTokenizer | 
| 4 | |
| 5 | import JSONTokenizer.TokenType; | 
| 6 | |
| 7 | main {
 | 
| 8 |   psvm {
 | 
| 9 | String src = takeInput(args, null); | 
| 10 | |
| 11 |     src = src.replace("\r\n", "\n");
 | 
| 12 | JSONTokenizer lex = new JSONTokenizer(src); | 
| 13 | |
| 14 | List<T> list = new ArrayList<T>(); | 
| 15 |     for (TokenType a; (a = lex.nextToken()) != TokenType.EOF;) {
 | 
| 16 | String word = lex.getToken(); | 
| 17 | String q = quote(word); | 
| 18 | T t = new T(a, word); | 
| 19 | boolean isSpace = t.isSpace(); | 
| 20 | if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) | 
| 21 | list.get(list.size()-1).word += word; // merge spaces | 
| 22 | else | 
| 23 | list.add(t); | 
| 24 | } | 
| 25 | |
| 26 | List<String> cnc = new ArrayList<String>(); | 
| 27 |     for (int i = 0; i < list.size(); ) {
 | 
| 28 | T t = list.get(i); | 
| 29 | boolean shouldBeSpace = (cnc.size() % 2) == 0; | 
| 30 | boolean isSpace = t.isSpace(); | 
| 31 |       if (shouldBeSpace == isSpace) {
 | 
| 32 | cnc.add(t.word); | 
| 33 | ++i; | 
| 34 | } else if (shouldBeSpace) | 
| 35 |         cnc.add("");
 | 
| 36 |       else {
 | 
| 37 | System.out.println(cncToLines(cnc)); | 
| 38 |         throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word));
 | 
| 39 | } | 
| 40 | } | 
| 41 | if ((cnc.size() % 2) == 0) | 
| 42 |       cnc.add("");
 | 
| 43 | |
| 44 |     saveTextFile("output/output.txt", cncToLines(cnc));
 | 
| 45 | } | 
| 46 | |
| 47 |   static class T {
 | 
| 48 | TokenType a; String word; | 
| 49 | |
| 50 |     T(TokenType a, String word) { this.a = a; this.word = word; }
 | 
| 51 | |
| 52 |     boolean isSpace() {
 | 
| 53 |       return a.equals("WHITE_SPACE") || a.equals("COMMENT");
 | 
| 54 | } | 
| 55 | } | 
| 56 | |
| 57 |   static String cncToLines(List<String> cnc) {
 | 
| 58 | StringBuilder out = new StringBuilder(); | 
| 59 | for (String token : cnc) | 
| 60 | out.append(quote(token) + "\n"); | 
| 61 | return out.toString(); | 
| 62 | } | 
| 63 | |
| 64 |   static String takeInput(String[] args, String def) tex {
 | 
| 65 | if (args.length != 0) return loadSnippet(args[0]); | 
| 66 |     return loadTextFile("input/input.txt", def);
 | 
| 67 | } | 
| 68 | |
| 69 |   public static String quote(String s) {
 | 
| 70 | if (s == null) return "null"; | 
| 71 |     return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\"";
 | 
| 72 | } | 
| 73 | |
| 74 |   static class Java20 extends Lexicon {
 | 
| 75 | |
| 76 | 	Java20() {
 | 
| 77 | |
| 78 | /** | 
| 79 | * Grammar for Java 2.0. | 
| 80 | * | 
| 81 | * Nonterminal - first letter uppercase | 
| 82 | * TERMINAL - all letters uppercase | 
| 83 | * keyword - all letters lowercase | 
| 84 | */ | 
| 85 | int INFINITY = -1; | 
| 86 | |
| 87 | /** | 
| 88 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] | 
| 89 | */ | 
| 90 | 		put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));
 | 
| 91 | |
| 92 | /** | 
| 93 | * 19.3 Terminals from section 3.7: Comment | 
| 94 | */ | 
| 95 | 		put("COMMENT", new Union(
 | 
| 96 | |
| 97 | // | 
| 98 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ | 
| 99 | // | 
| 100 | new Concatenation( | 
| 101 | 				new Singleton("/*"), new Concatenation(
 | 
| 102 | 				new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
 | 
| 103 | new Repetition( | 
| 104 | new Concatenation( | 
| 105 | 						new Singleton("*"),
 | 
| 106 | new Repetition(new Concatenation( | 
| 107 | 							new NonMatch("*/"),
 | 
| 108 | 							new Repetition(new NonMatch("*"), 0, INFINITY)
 | 
| 109 | ), 0, 1) | 
| 110 | ), 0, INFINITY | 
| 111 | ), | 
| 112 | 				new Singleton("*/")
 | 
| 113 | ))), new Union( | 
| 114 | |
| 115 | /** | 
| 116 | * End Of Line Comment: //[^\n]*\n | 
| 117 | */ | 
| 118 | new Concatenation( | 
| 119 | 				new Singleton("//"), new Concatenation(
 | 
| 120 | 				new Repetition(new NonMatch("\n"), 0, INFINITY),
 | 
| 121 | 				new Singleton("\n")
 | 
| 122 | )), | 
| 123 | |
| 124 | // | 
| 125 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ | 
| 126 | // | 
| 127 | new Concatenation( | 
| 128 | 				new Singleton("/**"), new Concatenation(
 | 
| 129 | new Repetition( | 
| 130 | new Concatenation( | 
| 131 | new Repetition(new Concatenation( | 
| 132 | 							new NonMatch("*/"),
 | 
| 133 | 							new Repetition(new NonMatch("*"), 0, INFINITY)
 | 
| 134 | ), 0, 1), | 
| 135 | 						new Singleton("*")
 | 
| 136 | ), 0, INFINITY | 
| 137 | ), | 
| 138 | 				new Singleton("/")
 | 
| 139 | )) | 
| 140 | ))); | 
| 141 | |
| 142 | 		put("IDENTIFIER", new Concatenation(
 | 
| 143 | new Union( | 
| 144 | PosixClass.alpha(), | 
| 145 | 				new Match("_$")
 | 
| 146 | ), | 
| 147 | new Repetition( | 
| 148 | new Union( | 
| 149 | PosixClass.alnum(), | 
| 150 | 					new Match("_$")
 | 
| 151 | ), 0, INFINITY | 
| 152 | ) | 
| 153 | )); | 
| 154 | |
| 155 | /** | 
| 156 | * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) | 
| 157 | */ | 
| 158 | 		put("KEYWORD", new Union(
 | 
| 159 | 			new Singleton("const"),
 | 
| 160 | 			new Singleton("goto")
 | 
| 161 | )); | 
| 162 | |
| 163 | /** | 
| 164 | * 19.3 Terminals from section 3.10.1: Integer Literal | 
| 165 | */ | 
| 166 | 		put("INTEGER_LITERAL", new Concatenation(
 | 
| 167 | new Union( | 
| 168 | /** | 
| 169 | * Decimal Integer Literal: 0|[1-9][[:digit:]]* | 
| 170 | */ | 
| 171 | 				new Singleton("0"), new Union(
 | 
| 172 | |
| 173 | new Concatenation( | 
| 174 | 					new Range('1', '9'),
 | 
| 175 | new Repetition(PosixClass.digit(), 0, INFINITY) | 
| 176 | ), new Union( | 
| 177 | |
| 178 | /** | 
| 179 | * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ | 
| 180 | */ | 
| 181 | new Concatenation( | 
| 182 | 					new Singleton("0"), new Concatenation(
 | 
| 183 | 					new Match("xX"),
 | 
| 184 | new Repetition(PosixClass.xdigit(), 1, INFINITY) | 
| 185 | )), | 
| 186 | |
| 187 | /** | 
| 188 | * Octal Integer Literal: 0[0-7]+ | 
| 189 | */ | 
| 190 | new Concatenation( | 
| 191 | 					new Singleton("0"),
 | 
| 192 | 					new Repetition(new Range('0', '7'), 1, INFINITY)
 | 
| 193 | ) | 
| 194 | ))), | 
| 195 | 			new Repetition(new Match("lL"), 0, 1)
 | 
| 196 | )); | 
| 197 | |
| 198 | /** | 
| 199 | * 19.3 Terminals from section 3.10.2: Floating-Point Literal | 
| 200 | */ | 
| 201 | 		put("FLOATING_POINT_LITERAL", new Union(
 | 
| 202 | |
| 203 | /** | 
| 204 | * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? | 
| 205 | */ | 
| 206 | new Concatenation( | 
| 207 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( | 
| 208 | 				new Singleton("."), new Concatenation(
 | 
| 209 | new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( | 
| 210 | new Repetition(new Concatenation( | 
| 211 | 					new Match("eE"), new Concatenation(
 | 
| 212 | 					new Repetition(new Match("-+"), 0, 1),
 | 
| 213 | new Repetition(PosixClass.digit(), 1, INFINITY) | 
| 214 | )), 0, 1), | 
| 215 | 				new Repetition(new Match("fFdD"), 0, 1)
 | 
| 216 | )))), new Union( | 
| 217 | |
| 218 | /** | 
| 219 | * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? | 
| 220 | */ | 
| 221 | new Concatenation( | 
| 222 | 				new Singleton("."), new Concatenation(
 | 
| 223 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( | 
| 224 | new Repetition(new Concatenation( | 
| 225 | 					new Match("eE"), new Concatenation(
 | 
| 226 | 					new Repetition(new Match("-+"), 0, 1),
 | 
| 227 | new Repetition(PosixClass.digit(), 1, INFINITY) | 
| 228 | )), 0, 1), | 
| 229 | 				new Repetition(new Match("fFdD"), 0, 1)
 | 
| 230 | ))), new Union( | 
| 231 | |
| 232 | /** | 
| 233 | * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? | 
| 234 | */ | 
| 235 | new Concatenation( | 
| 236 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( | 
| 237 | 				new Match("eE"), new Concatenation(
 | 
| 238 | 				new Repetition(new Match("-+"), 0, 1), new Concatenation(
 | 
| 239 | new Repetition(PosixClass.digit(), 1, INFINITY), | 
| 240 | 				new Repetition(new Match("fFdD"), 0, 1)
 | 
| 241 | )))), | 
| 242 | |
| 243 | /** | 
| 244 | * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] | 
| 245 | */ | 
| 246 | new Concatenation( | 
| 247 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( | 
| 248 | new Repetition(new Concatenation( | 
| 249 | 					new Match("eE"), new Concatenation(
 | 
| 250 | 					new Repetition(new Match("-+"), 0, 1),
 | 
| 251 | new Repetition(PosixClass.digit(), 1, INFINITY) | 
| 252 | )), 0, 1), | 
| 253 | 				new Match("fFdD")
 | 
| 254 | )) | 
| 255 | )))); | 
| 256 | |
| 257 | /** | 
| 258 | * 19.3 Terminals from section 3.10.3: Boolean Literal | 
| 259 | */ | 
| 260 | 		put("BOOLEAN_LITERAL", new Union(
 | 
| 261 | 			new Singleton("true"),
 | 
| 262 | 			new Singleton("false")
 | 
| 263 | )); | 
| 264 | |
| 265 | /** | 
| 266 | * 19.3 Terminals from section 3.10.4: Character Literal | 
| 267 | */ | 
| 268 | 		put("CHARACTER_LITERAL", new Concatenation(
 | 
| 269 | 			new Singleton("'"), new Concatenation(
 | 
| 270 | new Union( | 
| 271 | |
| 272 | /** | 
| 273 | * Single Character: [^\r\n'\\] | 
| 274 | */ | 
| 275 | 				new NonMatch("\r\n'\\"),
 | 
| 276 | |
| 277 | /** | 
| 278 | 				* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
 | 
| 279 | */ | 
| 280 | new Concatenation( | 
| 281 | 					new Singleton("\\"),
 | 
| 282 | new Union( | 
| 283 | 						new Match("btnfr\"'\\"),
 | 
| 284 | new Concatenation( | 
| 285 | 							new Repetition(new Range('0', '3'), 0, 1),
 | 
| 286 | 							new Repetition(new Range('0', '7'), 1, 2)
 | 
| 287 | ) | 
| 288 | ) | 
| 289 | ) | 
| 290 | ), | 
| 291 | 			new Singleton("'")
 | 
| 292 | ))); | 
| 293 | |
| 294 | /** | 
| 295 | * 19.3 Terminals from section 3.10.5: String Literal | 
| 296 | */ | 
| 297 | 		put("STRING_LITERAL", new Concatenation(
 | 
| 298 | 			new Singleton("\""), new Concatenation(
 | 
| 299 | new Repetition( | 
| 300 | new Union( | 
| 301 | |
| 302 | /** | 
| 303 | * Single Character: [^\r\n"\\] | 
| 304 | */ | 
| 305 | 					new NonMatch("\r\n\"\\"),
 | 
| 306 | |
| 307 | /** | 
| 308 | 					* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
 | 
| 309 | */ | 
| 310 | new Concatenation( | 
| 311 | 						new Singleton("\\"),
 | 
| 312 | new Union( | 
| 313 | 							new Match("btnfr\"'\\"),
 | 
| 314 | new Concatenation( | 
| 315 | 								new Repetition(new Range('0', '3'), 0, 1),
 | 
| 316 | 								new Repetition(new Range('0', '7'), 1, 2)
 | 
| 317 | ) | 
| 318 | ) | 
| 319 | ) | 
| 320 | ), 0, INFINITY | 
| 321 | ), | 
| 322 | 			new Singleton("\"")
 | 
| 323 | ))); | 
| 324 | |
| 325 | /** | 
| 326 | * 19.3 Terminals section 3.10.7: Null Literal | 
| 327 | */ | 
| 328 | 		put("NULL_LITERAL", new Singleton("null"));
 | 
| 329 | |
| 330 | // OK, it seems we have to add some more stuff... | 
| 331 | |
| 332 | 		//put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
 | 
| 333 | 		//put("OTHER1", new NonMatch("")); // catch anything, one character at a time
 | 
| 334 | 		put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time
 | 
| 335 | |
| 336 | } | 
| 337 | } // class Java20 | 
| 338 | } | 
Began life as a copy of #651
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1000306 | 
| Snippet name: | JSON tokenizer (abandoned) | 
| Eternal ID of this version: | #1000306/1 | 
| Text MD5: | 0b7580f93160040e20c30bbbd9290d03 | 
| Author: | stefan | 
| Category: | javax | 
| Type: | JavaX (input.txt to output.txt) | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2015-06-28 18:43:47 | 
| Source code size: | 8808 bytes / 338 lines | 
| Pitched / IR pitched: | No / Yes | 
| Views / Downloads: | 1027 / 820 | 
| Referenced in: | [show references] |