!636 !629 // standard functions !1000305 // class JSONTokenizer import JSONTokenizer.TokenType; main { psvm { String src = takeInput(args, null); src = src.replace("\r\n", "\n"); JSONTokenizer lex = new JSONTokenizer(src); List<T> list = new ArrayList<T>(); for (TokenType a; (a = lex.nextToken()) != TokenType.EOF;) { String word = lex.getToken(); String q = quote(word); T t = new T(a, word); boolean isSpace = t.isSpace(); if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) list.get(list.size()-1).word += word; // merge spaces else list.add(t); } List<String> cnc = new ArrayList<String>(); for (int i = 0; i < list.size(); ) { T t = list.get(i); boolean shouldBeSpace = (cnc.size() % 2) == 0; boolean isSpace = t.isSpace(); if (shouldBeSpace == isSpace) { cnc.add(t.word); ++i; } else if (shouldBeSpace) cnc.add(""); else { System.out.println(cncToLines(cnc)); throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word)); } } if ((cnc.size() % 2) == 0) cnc.add(""); saveTextFile("output/output.txt", cncToLines(cnc)); } static class T { TokenType a; String word; T(TokenType a, String word) { this.a = a; this.word = word; } boolean isSpace() { return a.equals("WHITE_SPACE") || a.equals("COMMENT"); } } static String cncToLines(List<String> cnc) { StringBuilder out = new StringBuilder(); for (String token : cnc) out.append(quote(token) + "\n"); return out.toString(); } static String takeInput(String[] args, String def) tex { if (args.length != 0) return loadSnippet(args[0]); return loadTextFile("input/input.txt", def); } public static String quote(String s) { if (s == null) return "null"; return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; } static class Java20 extends Lexicon { Java20() { /** * Grammar for Java 2.0. * * Nonterminal - first letter uppercase * TERMINAL - all letters uppercase * keyword - all letters lowercase */ int INFINITY = -1; /** * 19.3 Terminals from section 3.6: White Space: [[:space:]] */ put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY)); /** * 19.3 Terminals from section 3.7: Comment */ put("COMMENT", new Union( // // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ // new Concatenation( new Singleton("/*"), new Concatenation( new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( new Repetition( new Concatenation( new Singleton("*"), new Repetition(new Concatenation( new NonMatch("*/"), new Repetition(new NonMatch("*"), 0, INFINITY) ), 0, 1) ), 0, INFINITY ), new Singleton("*/") ))), new Union( /** * End Of Line Comment: //[^\n]*\n */ new Concatenation( new Singleton("//"), new Concatenation( new Repetition(new NonMatch("\n"), 0, INFINITY), new Singleton("\n") )), // // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ // new Concatenation( new Singleton("/**"), new Concatenation( new Repetition( new Concatenation( new Repetition(new Concatenation( new NonMatch("*/"), new Repetition(new NonMatch("*"), 0, INFINITY) ), 0, 1), new Singleton("*") ), 0, INFINITY ), new Singleton("/") )) ))); put("IDENTIFIER", new Concatenation( new Union( PosixClass.alpha(), new Match("_$") ), new Repetition( new Union( PosixClass.alnum(), new Match("_$") ), 0, INFINITY ) )); /** * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) */ put("KEYWORD", new Union( new Singleton("const"), new Singleton("goto") )); /** * 19.3 Terminals from section 3.10.1: Integer Literal */ put("INTEGER_LITERAL", new Concatenation( new Union( /** * Decimal Integer Literal: 0|[1-9][[:digit:]]* */ new Singleton("0"), new Union( new Concatenation( new Range('1', '9'), new Repetition(PosixClass.digit(), 0, INFINITY) ), new Union( /** * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ */ new Concatenation( new Singleton("0"), new Concatenation( new Match("xX"), new Repetition(PosixClass.xdigit(), 1, INFINITY) )), /** * Octal Integer Literal: 0[0-7]+ */ new Concatenation( new Singleton("0"), new Repetition(new Range('0', '7'), 1, INFINITY) ) ))), new Repetition(new Match("lL"), 0, 1) )); /** * 19.3 Terminals from section 3.10.2: Floating-Point Literal */ put("FLOATING_POINT_LITERAL", new Union( /** * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Singleton("."), new Concatenation( new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Repetition(new Match("fFdD"), 0, 1) )))), new Union( /** * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? */ new Concatenation( new Singleton("."), new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Repetition(new Match("fFdD"), 0, 1) ))), new Union( /** * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Repetition(new Match("fFdD"), 0, 1) )))), /** * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Match("fFdD") )) )))); /** * 19.3 Terminals from section 3.10.3: Boolean Literal */ put("BOOLEAN_LITERAL", new Union( new Singleton("true"), new Singleton("false") )); /** * 19.3 Terminals from section 3.10.4: Character Literal */ put("CHARACTER_LITERAL", new Concatenation( new Singleton("'"), new Concatenation( new Union( /** * Single Character: [^\r\n'\\] */ new NonMatch("\r\n'\\"), /** * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) */ new Concatenation( new Singleton("\\"), new Union( new Match("btnfr\"'\\"), new Concatenation( new Repetition(new Range('0', '3'), 0, 1), new Repetition(new Range('0', '7'), 1, 2) ) ) ) ), new Singleton("'") ))); /** * 19.3 Terminals from section 3.10.5: String Literal */ put("STRING_LITERAL", new Concatenation( new Singleton("\""), new Concatenation( new Repetition( new Union( /** * Single Character: [^\r\n"\\] */ new NonMatch("\r\n\"\\"), /** * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) */ new Concatenation( new Singleton("\\"), new Union( new Match("btnfr\"'\\"), new Concatenation( new Repetition(new Range('0', '3'), 0, 1), new Repetition(new Range('0', '7'), 1, 2) ) ) ) ), 0, INFINITY ), new Singleton("\"") ))); /** * 19.3 Terminals section 3.10.7: Null Literal */ put("NULL_LITERAL", new Singleton("null")); // OK, it seems we have to add some more stuff... //put("OTHER1", new Match(";{}=,<>[]().+-:|&!")); //put("OTHER1", new NonMatch("")); // catch anything, one character at a time put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time } } // class Java20 }
Began life as a copy of #651
download show line numbers debug dex old transpilations
Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1000306 |
Snippet name: | JSON tokenizer (abandoned) |
Eternal ID of this version: | #1000306/1 |
Text MD5: | 0b7580f93160040e20c30bbbd9290d03 |
Author: | stefan |
Category: | javax |
Type: | JavaX (input.txt to output.txt) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-06-28 18:43:47 |
Source code size: | 8808 bytes / 338 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 802 / 576 |
Referenced in: | [show references] |