!636 //1000300 // Lexicon //1000515 // Lexicon, fixing !quicknew class JavaTok { static String join(List cnc) { new StringBuilder buf; for (String s : cnc) buf.append(s); return buf.toString(); } static List split(String src) { Java20 lex = new Java20(); src = src.replace("\r\n", "\n"); LineNumberReader source = new LineNumberReader(new StringReader(src)); int lineNr = source.getLineNumber()+1; List list = new ArrayList(); try { for (Object a; (a = lex.grab(source)) != lex.$;) { String word = lex.word(); String q = quote(word); //System.out.println("grabbed at line " + lineNr + ": " + a + " " + q); lineNr = source.getLineNumber()+1; T t = new T(a, word); boolean isSpace = t.isSpace(); if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) list.get(list.size()-1).word += word; // merge spaces else list.add(t); } } catch (Lexicon.Exception e) { throw new RuntimeException(e); } List cnc = new ArrayList(); for (int i = 0; i < list.size(); ) { T t = list.get(i); boolean shouldBeSpace = (cnc.size() % 2) == 0; boolean isSpace = t.isSpace(); if (shouldBeSpace == isSpace) { cnc.add(t.word); ++i; } else if (shouldBeSpace) cnc.add(""); else { System.out.println(cncToLines(cnc)); throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word)); } } if ((cnc.size() % 2) == 0) cnc.add(""); return cnc; } static class T { Object a; String word; T(Object a, String word) { this.a = a; this.word = word; } boolean isSpace() { return a.equals("WHITE_SPACE") || a.equals("COMMENT"); } } static String cncToLines(List cnc) { StringBuilder out = new StringBuilder(); for (String token : cnc) out.append(quote(token) + "\n"); return out.toString(); } public static String quote(String s) { if (s == null) return "null"; return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; } static class Java20 extends Lexicon { Java20() { /** * Grammar for Java 2.0. * * Nonterminal - first letter uppercase * TERMINAL - all letters uppercase * keyword - all letters lowercase */ int INFINITY = -1; /** * 19.3 Terminals from section 3.6: White Space: [[:space:]] */ put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY)); /** * 19.3 Terminals from section 3.7: Comment */ put("COMMENT", new Union( // // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ // new Concatenation( new Singleton("/*"), new Concatenation( new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( new Repetition( new Concatenation( new Singleton("*"), new Repetition(new Concatenation( new NonMatch("*/"), new Repetition(new NonMatch("*"), 0, INFINITY) ), 0, 1) ), 0, INFINITY ), new Singleton("*/") ))), new Union( /** * End Of Line Comment: //[^\n]*\n */ new Concatenation( new Singleton("//"), new Concatenation( new Repetition(new NonMatch("\n"), 0, INFINITY), new Singleton("\n") )), // // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ // new Concatenation( new Singleton("/**"), new Concatenation( new Repetition( new Concatenation( new Repetition(new Concatenation( new NonMatch("*/"), new Repetition(new NonMatch("*"), 0, INFINITY) ), 0, 1), new Singleton("*") ), 0, INFINITY ), new Singleton("/") )) ))); put("IDENTIFIER", new Concatenation( new Union( PosixClass.alpha(), new Match("_$") ), new Repetition( new Union( PosixClass.alnum(), new Match("_$") ), 0, INFINITY ) )); /** * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) */ put("KEYWORD", new Union( new Singleton("const"), new Singleton("goto") )); /** * 19.3 Terminals from section 3.10.1: Integer Literal */ put("INTEGER_LITERAL", new Concatenation( new Union( /** * Decimal Integer Literal: 0|[1-9][[:digit:]]* */ new Singleton("0"), new Union( new Concatenation( new Range('1', '9'), new Repetition(PosixClass.digit(), 0, INFINITY) ), new Union( /** * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ */ new Concatenation( new Singleton("0"), new Concatenation( new Match("xX"), new Repetition(PosixClass.xdigit(), 1, INFINITY) )), /** * Octal Integer Literal: 0[0-7]+ */ new Concatenation( new Singleton("0"), new Repetition(new Range('0', '7'), 1, INFINITY) ) ))), new Repetition(new Match("lL"), 0, 1) )); /** * 19.3 Terminals from section 3.10.2: Floating-Point Literal */ put("FLOATING_POINT_LITERAL", new Union( /** * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Singleton("."), new Concatenation( new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Repetition(new Match("fFdD"), 0, 1) )))), new Union( /** * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? */ new Concatenation( new Singleton("."), new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Repetition(new Match("fFdD"), 0, 1) ))), new Union( /** * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Repetition(new Match("fFdD"), 0, 1) )))), /** * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Match("fFdD") )) )))); /** * 19.3 Terminals from section 3.10.3: Boolean Literal */ put("BOOLEAN_LITERAL", new Union( new Singleton("true"), new Singleton("false") )); /** * 19.3 Terminals from section 3.10.4: Character Literal */ put("CHARACTER_LITERAL", new Concatenation( new Singleton("'"), new Concatenation( new Union( /** * Single Character: [^\r\n'\\] */ new NonMatch("\r\n'\\"), /** * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) */ new Concatenation( new Singleton("\\"), new Union( new Match("btnfr\"'\\"), new Concatenation( new Repetition(new Range('0', '3'), 0, 1), new Repetition(new Range('0', '7'), 1, 2) ) ) ) ), new Singleton("'") ))); put("MULTILINE_LITERAL", new Concatenation( new Singleton("[["), new Concatenation( new Repetition( new Union( new NonMatch("]"), new Concatenation( new Singleton("]"), new NonMatch("]")) ), 0, INFINITY ), new Singleton("]]") ))); put("MULTILINE_LITERAL2", new Concatenation( new Singleton("[=["), new Concatenation( new Repetition( new Union( new NonMatch("]"), new Concatenation(new Singleton("]"), new Union( new NonMatch("="), new Concatenation(new Singleton("="), new NonMatch("]")))) ), 0, INFINITY ), new Singleton("]=]") ))); /** * 19.3 Terminals from section 3.10.5: String Literal */ put("STRING_LITERAL", new Concatenation( new Singleton("\""), new Concatenation( new Repetition( new Union( /** * Single Character: [^\r\n"\\] */ new NonMatch("\r\n\"\\"), /** * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) */ new Concatenation( new Singleton("\\"), new Union( new Match("btnfr\"'\\"), new Union( new Concatenation( new Repetition(new Range('0', '3'), 0, 1), new Repetition(new Range('0', '7'), 1, 2) ), new Concatenation( new Singleton("u"), new Repetition(new Match("0123456789abcdefABCDEF"), 4, 4) ) ) ) ) ), 0, INFINITY ), new Singleton("\"") ))); /** * 19.3 Terminals section 3.10.7: Null Literal */ put("NULL_LITERAL", new Singleton("null")); // OK, it seems we have to add some more stuff... //put("OTHER1", new Match(";{}=,<>[]().+-:|&!")); //put("OTHER1", new NonMatch("")); // catch anything, one character at a time put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time } } // class Java20 } !include #1000300 // Lexicon