import java.security.NoSuchAlgorithmException; import java.security.MessageDigest; import java.lang.reflect.*; import java.net.*; import java.io.*; import javax.swing.*; import java.util.regex.*; import java.util.*; public class main { public static void main(String[] args) throws Exception { String s = loadMainJava(); // replace L with List // and L with List List tok = JavaTok.split(s); for (int i = 1; i+6 < tok.size(); i += 2) if (tok.get(i).equals("L") && tok.get(i+2).equals("<") && tok.get(i+6).equals(">")) { tok.set(i, "List"); if (tok.get(i+4).equals("S")) tok.set(i+4, "String"); } s = JavaTok.join(tok); saveMainJava(s); } static String loadMainJava() throws IOException { return loadTextFile("input/main.java", ""); } static void saveMainJava(String s) throws IOException { saveTextFile("output/main.java", s); } /** writes safely (to temp file, then rename) */ public static void saveTextFile(String fileName, String contents) throws IOException { File file = new File(fileName); File parentFile = file.getParentFile(); if (parentFile != null) parentFile.mkdirs(); String tempFileName = fileName + "_temp"; FileOutputStream fileOutputStream = new FileOutputStream(tempFileName); OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, "UTF-8"); PrintWriter printWriter = new PrintWriter(outputStreamWriter); printWriter.print(contents); printWriter.close(); if (file.exists() && !file.delete()) throw new IOException("Can't delete " + fileName); if (!new File(tempFileName).renameTo(file)) throw new IOException("Can't rename " + tempFileName + " to " + fileName); } public static String loadTextFile(String fileName, String defaultContents) throws IOException { if (!new File(fileName).exists()) return defaultContents; FileInputStream fileInputStream = new FileInputStream(fileName); InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, "UTF-8"); return loadTextFile(inputStreamReader); } public static String loadTextFile(Reader reader) throws IOException { StringBuilder builder = new StringBuilder(); try { BufferedReader bufferedReader = new BufferedReader(reader); String line; while ((line = bufferedReader.readLine()) != null) builder.append(line).append('\n'); } finally { reader.close(); } return builder.length() == 0 ? "" : builder.substring(0, builder.length()-1); } } class JavaTok { static String join(List cnc) { StringBuilder buf = new StringBuilder(); for (String s : cnc) buf.append(s); return buf.toString(); } static List split(String src) { Java20 lex = new Java20(); src = src.replace("\r\n", "\n"); LineNumberReader source = new LineNumberReader(new StringReader(src)); int lineNr = source.getLineNumber()+1; List list = new ArrayList(); try { for (Object a; (a = lex.grab(source)) != lex.$;) { String word = lex.word(); String q = quote(word); //System.out.println("grabbed at line " + lineNr + ": " + a + " " + q); lineNr = source.getLineNumber()+1; T t = new T(a, word); boolean isSpace = t.isSpace(); if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) list.get(list.size()-1).word += word; // merge spaces else list.add(t); } } catch (Lexicon.Exception e) { throw new RuntimeException(e); } List cnc = new ArrayList(); for (int i = 0; i < list.size(); ) { T t = list.get(i); boolean shouldBeSpace = (cnc.size() % 2) == 0; boolean isSpace = t.isSpace(); if (shouldBeSpace == isSpace) { cnc.add(t.word); ++i; } else if (shouldBeSpace) cnc.add(""); else { System.out.println(cncToLines(cnc)); throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word)); } } if ((cnc.size() % 2) == 0) cnc.add(""); return cnc; } static class T { Object a; String word; T(Object a, String word) { this.a = a; this.word = word; } boolean isSpace() { return a.equals("WHITE_SPACE") || a.equals("COMMENT"); } } static String cncToLines(List cnc) { StringBuilder out = new StringBuilder(); for (String token : cnc) out.append(quote(token) + "\n"); return out.toString(); } public static String quote(String s) { if (s == null) return "null"; return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; } static class Java20 extends Lexicon { Java20() { /** * Grammar for Java 2.0. * * Nonterminal - first letter uppercase * TERMINAL - all letters uppercase * keyword - all letters lowercase */ int INFINITY = -1; /** * 19.3 Terminals from section 3.6: White Space: [[:space:]] */ put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY)); /** * 19.3 Terminals from section 3.7: Comment */ put("COMMENT", new Union( // // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ // new Concatenation( new Singleton("/*"), new Concatenation( new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( new Repetition( new Concatenation( new Singleton("*"), new Repetition(new Concatenation( new NonMatch("*/"), new Repetition(new NonMatch("*"), 0, INFINITY) ), 0, 1) ), 0, INFINITY ), new Singleton("*/") ))), new Union( /** * End Of Line Comment: //[^\n]*\n */ new Concatenation( new Singleton("//"), new Concatenation( new Repetition(new NonMatch("\n"), 0, INFINITY), new Singleton("\n") )), // // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ // new Concatenation( new Singleton("/**"), new Concatenation( new Repetition( new Concatenation( new Repetition(new Concatenation( new NonMatch("*/"), new Repetition(new NonMatch("*"), 0, INFINITY) ), 0, 1), new Singleton("*") ), 0, INFINITY ), new Singleton("/") )) ))); put("IDENTIFIER", new Concatenation( new Union( PosixClass.alpha(), new Match("_$") ), new Repetition( new Union( PosixClass.alnum(), new Match("_$") ), 0, INFINITY ) )); /** * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) */ put("KEYWORD", new Union( new Singleton("const"), new Singleton("goto") )); /** * 19.3 Terminals from section 3.10.1: Integer Literal */ put("INTEGER_LITERAL", new Concatenation( new Union( /** * Decimal Integer Literal: 0|[1-9][[:digit:]]* */ new Singleton("0"), new Union( new Concatenation( new Range('1', '9'), new Repetition(PosixClass.digit(), 0, INFINITY) ), new Union( /** * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ */ new Concatenation( new Singleton("0"), new Concatenation( new Match("xX"), new Repetition(PosixClass.xdigit(), 1, INFINITY) )), /** * Octal Integer Literal: 0[0-7]+ */ new Concatenation( new Singleton("0"), new Repetition(new Range('0', '7'), 1, INFINITY) ) ))), new Repetition(new Match("lL"), 0, 1) )); /** * 19.3 Terminals from section 3.10.2: Floating-Point Literal */ put("FLOATING_POINT_LITERAL", new Union( /** * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Singleton("."), new Concatenation( new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Repetition(new Match("fFdD"), 0, 1) )))), new Union( /** * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? */ new Concatenation( new Singleton("."), new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Repetition(new Match("fFdD"), 0, 1) ))), new Union( /** * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Repetition(new Match("fFdD"), 0, 1) )))), /** * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] */ new Concatenation( new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( new Repetition(new Concatenation( new Match("eE"), new Concatenation( new Repetition(new Match("-+"), 0, 1), new Repetition(PosixClass.digit(), 1, INFINITY) )), 0, 1), new Match("fFdD") )) )))); /** * 19.3 Terminals from section 3.10.3: Boolean Literal */ put("BOOLEAN_LITERAL", new Union( new Singleton("true"), new Singleton("false") )); /** * 19.3 Terminals from section 3.10.4: Character Literal */ put("CHARACTER_LITERAL", new Concatenation( new Singleton("'"), new Concatenation( new Union( /** * Single Character: [^\r\n'\\] */ new NonMatch("\r\n'\\"), /** * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) */ new Concatenation( new Singleton("\\"), new Union( new Match("btnfr\"'\\"), new Concatenation( new Repetition(new Range('0', '3'), 0, 1), new Repetition(new Range('0', '7'), 1, 2) ) ) ) ), new Singleton("'") ))); put("MULTILINE_LITERAL", new Concatenation( new Singleton("[["), new Concatenation( new Repetition( new Union( new NonMatch("]"), new Concatenation( new Singleton("]"), new NonMatch("]")) ), 0, INFINITY ), new Singleton("]]") ))); /** * 19.3 Terminals from section 3.10.5: String Literal */ put("STRING_LITERAL", new Concatenation( new Singleton("\""), new Concatenation( new Repetition( new Union( /** * Single Character: [^\r\n"\\] */ new NonMatch("\r\n\"\\"), /** * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) */ new Concatenation( new Singleton("\\"), new Union( new Match("btnfr\"'\\"), new Union( new Concatenation( new Repetition(new Range('0', '3'), 0, 1), new Repetition(new Range('0', '7'), 1, 2) ), new Concatenation( new Singleton("u"), new Repetition(new Match("0123456789abcdefABCDEF"), 4, 4) ) ) ) ) ), 0, INFINITY ), new Singleton("\"") ))); /** * 19.3 Terminals section 3.10.7: Null Literal */ put("NULL_LITERAL", new Singleton("null")); // OK, it seems we have to add some more stuff... //put("OTHER1", new Match(";{}=,<>[]().+-:|&!")); //put("OTHER1", new NonMatch("")); // catch anything, one character at a time put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time } } // class Java20 } /** *

This class implements a {@link Lexicon}.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ class Lexicon { //Q /** *

The number of lexical NFA states constructed.

*/ private static int QSize = 0; /** *

Creates a new state in the lexical NFA.

* * @return a new state in the lexical NFA. */ private static Integer s() { return ++QSize; } //delta /** *

The transition relation of the lexical NFA.

*/ private static final Stack> delta = new Stack>(); /** *

Puts a transition into the lexical NFA.

* * @param s the state from which the transition is made. * @param A the Alphabet on which the transition is made. * @param r the state to which the transition is made. */ private static void put(Integer s, Alphabet A, Integer r) { if (Math.max(s,r) >= delta.size()) delta.setSize(Math.max(s,r)+1); Stack pairs = delta.get(s); if (pairs == null) delta.set(s, pairs = new Stack()); pairs.push(A); pairs.push(r); } //Set /** *

This class implements a {@link Lexicon.Set Set}.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> * @param the element type. */ static class Set extends Stack { /** *

The null exclusion indicator. If true, add methods will not add null to this Set.

*/ private final boolean excludeNull; /** *

Constructs a Set with an initial capacity.

* * @param capacity the initial capacity. The magnitude of capacity is the initial capacity. The null exclusion indicator is initialized to true if capacity is negative. */ Set(int capacity) { super(); ensureCapacity(Math.abs(capacity)); excludeNull = (capacity < 0); } /** *

Adds an element to this Set. The element is not added if it occurs in this Set or it is null and the null exclusion indicator is true. The capacity is expanded if necessary.

* * @param element the element to add to this Set. * @return true if this Set is changed; false otherwise. */ public boolean add(E element) { if (excludeNull && element == null || contains(element)) return false; push(element); return true; } /** *

Adds a Set of elements to this Set. An element is not added if it occurs in this Set or it is null and the null exclusion indicator is true. The capacity is expanded if necessary.

* * @param index the index in S beyond which elements are added. * @param S the Set to add to this Set. * @return true if this Set is changed; false otherwise. */ boolean add(int index, Set S) { if (S == null) return false; boolean push = isEmpty(); boolean add = false; for (int i = index; i < S.size(); i++) { E element = S.get(i); if (!(excludeNull && element == null)) if (push) { push(element); add = true; } else if (add(element)) add = true; } return add; } /** *

Adds a Set of elements to this Set. An element is not added if it occurs in this Set or it is null and the null exclusion indicator is true. The capacity is expanded if necessary.

* * @param S the Set to add to this Set. * @return true if this Set is changed; false otherwise. */ boolean add(Set S) { return add(0, S); } public String toString() { StringBuffer result = new StringBuffer(80); result.append('{'); for (int i = 0; i < size(); i++) { if (i > 0) result.append(' '); result.append(get(i)); } result.append('}'); return result.toString(); } //Set } //I /** *

The initial states of the lexical NFA. When empty, there is a need to compute the current initial states. It is computed only on demand created by {@link #initial()}.

*/ private final Set I; //F /** *

The final states of the lexical NFA. A final state is mapped to the terminal it accepts in this Lexicon. When empty, there is a need to compute current final states. It is computed only on demand created by {@link #initial()}.

*/ private final Map F; //Lexicon.transition /** *

Computes a transition using the lexical NFA.

* * @param S the states from which the transition is made. * @param a the character on which the transition is made. * @param R the states to which the transition is made. * @return the states to which the transition is made. */ private static Set transition(Set S, char a, Set R) { R.clear(); for (Integer s : S) { Stack pairs = delta.get(s); if (pairs != null) for (int k = 0; k < pairs.size(); k += 2) { Alphabet A = (Alphabet)pairs.get(k); if (A != null) { Integer r = (Integer)pairs.get(k+1); if (A.contains(a)) R.add(r); } } } return R; } //Lexicon.closure /** *

Computes a reflexive transitive closure under empty transition using the lexical NFA. The closure is computed in place by a breadth-first search expanding S.

* * @param S the states whose reflexive transitive closure is computed under empty transition. * @return the reflexive transitive closure of S under empty transition. */ private static Set closure(Set S) { for (int i = 0; i < S.size(); i++) { Integer s = S.get(i); Stack pairs = delta.get(s); if (pairs != null) for (int k = 0; k < pairs.size(); k += 2) { Alphabet A = (Alphabet)pairs.get(k); if (A == null) { Integer r = (Integer)pairs.get(k+1); S.add(r); } } } return S; } //Expression /** *

This class implements an {@link Lexicon.Expression Expression} expressing a regular language.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ abstract public static class Expression implements Cloneable { /** *

The initial state of the NFA constructed from this Expression.

*/ Integer i; /** *

The final state of the NFA constructed from this Expression.

*/ Integer f; /** *

Creates a clone of this Expression, and replicates the NFA constructed from this Expression.

* * @return a clone of this Expression. */ abstract public Object clone(); } //Alphabet /** *

This class implements an {@link Lexicon.Alphabet Alphabet} of character symbols.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ abstract public static class Alphabet extends Expression { /** *

Indicates whether a character occurs in this Alphabet.

* * @param a the character whose status is requested. * @return true if a occurs in this Alphabet; false otherwise. */ abstract boolean contains(char a); } //Match /** *

This class implements an {@link Lexicon.Alphabet Alphabet} containing some characters.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class Match extends Alphabet { /** *

The {@link Character} or {@link String} representing this Alphabet.

*/ final Object A; /** *

Constructs an Alphabet containing some characters, and builds the NFA constructed from this Expression.

* * @param i the initial state of the NFA constructed. * @param A the {@link Character} or {@link String} of characters in this Alphabet. * @param f the final state of the NFA constructed. */ private Match(Integer i, Object A, Integer f) { this.A = A; put(this.i = i, this, this.f = f); } /** *

Constructs an Alphabet containing one character, and builds the NFA constructed from this Expression.

* * @param i the initial state of the NFA constructed. * @param a the character in this Alphabet. * @param f the final state of the NFA constructed. */ private Match(Integer i, char a, Integer f) { this(i, new Character(a), f); } /** *

Constructs an Alphabet containing one character, and builds the NFA constructed from this Expression.

* * @param a the character in this Alphabet. */ public Match(char a) { this(s(), a, s()); } /** *

Constructs an Alphabet containing some characters, and builds the NFA constructed from this Expression.

* * @param A the {@link Character} or {@link String} of characters in this Alphabet. */ public Match(Object A) { this(s(), A, s()); } /** *

Indicates whether a character occurs in this Alphabet.

* * @param a the character whose status is requested. * @return true if a occurs in this Alphabet; false otherwise. */ boolean contains(char a) { if (A instanceof Character) return (Character)A == a; if (A instanceof String) return ((String)A).indexOf(a) != -1; if (A instanceof Stack) for (Alphabet alphabet : (Stack)A) if (alphabet.contains(a)) return true; return false; } /** *

Creates a clone of this Alphabet, and replicates the NFA constructed from this Expression.

* * @return a clone of this Alphabet. */ public Object clone() { return new Match(A); } } //NonMatch /** *

This class implements an {@link Lexicon.Alphabet Alphabet} containing all except some characters.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class NonMatch extends Match { /** *

Constructs an Alphabet containing all characters except one, and builds the NFA constructed from this Expression.

* * @param a the character not in this Alphabet. */ public NonMatch(char a) { super(a); } /** *

Constructs an Alphabet containing all characters except some, and builds the NFA constructed from this Expression.

* * @param A the {@link Character} or {@link String} of characters not in this Alphabet. */ public NonMatch(Object A) { super(A); } /** *

Indicates whether a character occurs in this Alphabet.

* * @param a the character whose status is requested. * @return true if a occurs in this Alphabet; false otherwise. */ boolean contains(char a) { return a != (char)-1 && !super.contains(a); } /** *

Creates a clone of this Alphabet, and replicates the NFA constructed from this Expression.

* * @return a clone of this Alphabet. */ public Object clone() { return new NonMatch(A); } } //Range /** *

This class implements an {@link Lexicon.Alphabet Alphabet} containing the characters in a range.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class Range extends Alphabet { /** *

The first character in the range.

*/ private final char a1; /** *

The last character in the range.

*/ private final char a2; /** *

Constructs an Alphabet containing the characters in a range, and builds the NFA constructed from this Expression.

* * @param a1 the first character in the range. * @param a2 the last character in the range. */ public Range(char a1, char a2) { this.a1 = a1; this.a2 = a2; put(i = s(), this, f = s()); } /** *

Indicates whether a character occurs in this Alphabet.

* * @param a the character whose status is requested. * @return true if a occurs in this Alphabet; false otherwise. */ boolean contains(char a) { return a1 <= a && a <= a2; } /** *

Creates a clone of this Alphabet, and replicates the NFA constructed from this Expression.

* * @return a clone of this Alphabet. */ public Object clone() { return new Range(a1, a2); } } /** *

This class implements an {@link Lexicon.Alphabet Alphabet} containing the characters in a POSIX character class.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class PosixClass extends Alphabet { /** *

The bit mask representing this PosixClass.

*/ private final int posixClass; /** *

Constructs an Alphabet containing the characters in a POSIX character class, and builds the NFA constructed from this Expression.

* * @param posixClass the bit mask representing this PosixClass. */ private PosixClass(int posixClass) { this.posixClass = posixClass; put(i = s(), this, f = s()); } /** *

Creates an Alphabet containing the uppercase alphabetic characters.

* * @return an Alphabet containing the uppercase alphabetic characters. */ public static PosixClass upper() { return new PosixClass(0x0001); } /** *

Creates an Alphabet containing the lowercase alphabetic characters.

* * @return an Alphabet containing the lowercase alphabetic characters. */ public static PosixClass lower() { return new PosixClass(0x0002); } /** *

Creates an Alphabet containing the alphabetic characters.

* * @return an Alphabet containing the alphabetic characters. */ public static PosixClass alpha() { return new PosixClass(0x0004); } /** *

Creates an Alphabet containing the decimal digit characters.

* * @return an Alphabet containing the decimal digit characters. */ public static PosixClass digit() { return new PosixClass(0x0008); } /** *

Creates an Alphabet containing the hexadecimal digit characters.

* * @return an Alphabet containing the hexadecimal digit characters. */ public static PosixClass xdigit() { return new PosixClass(0x0010); } /** *

Creates an Alphabet containing the alphanumeric characters.

* * @return an Alphabet containing the alphanumeric characters. */ public static PosixClass alnum() { return new PosixClass(0x0020); } /** *

Creates an Alphabet containing the punctuation characters.

* * @return an Alphabet containing the punctuation characters. */ public static PosixClass punct() { return new PosixClass(0x0040); } /** *

Creates an Alphabet containing the graphical characters.

* * @return an Alphabet containing the graphical characters. */ public static PosixClass graph() { return new PosixClass(0x0080); } /** *

Creates an Alphabet containing the printable characters.

* * @return an Alphabet containing the printable characters. */ public static PosixClass print() { return new PosixClass(0x0100); } /** *

Creates an Alphabet containing the blank characters.

* * @return an Alphabet containing the blank characters. */ public static PosixClass blank() { return new PosixClass(0x0200); } /** *

Creates an Alphabet containing the space characters.

* * @return an Alphabet containing the space characters. */ public static PosixClass space() { return new PosixClass(0x0400); } /** *

Creates an Alphabet containing the control characters.

* * @return an Alphabet containing the control characters. */ public static PosixClass cntrl() { return new PosixClass(0x0800); } /** *

Indicates whether a character occurs in this Alphabet.

* * @param a the character whose status is requested. * @return true if a occurs in this Alphabet; false otherwise. */ boolean contains(char a) { int UPPER = 0x0001; int LOWER = 0x0002; int ALPHA = 0x0004; int DIGIT = 0x0008; int XDIGIT = 0x0010; int ALNUM = 0x0020; int PUNCT = 0x0040; int GRAPH = 0x0080; int PRINT = 0x0100; int BLANK = 0x0200; int SPACE = 0x0400; int CNTRL = 0x0800; int classes = 0; switch (Character.getType(a)) { default: break; case Character.UPPERCASE_LETTER: classes |= UPPER | ALPHA | (('A' <= a && a <= 'F') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break; case Character.LOWERCASE_LETTER: classes |= LOWER | ALPHA | (('a' <= a && a <= 'f') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break; case Character.TITLECASE_LETTER: case Character.MODIFIER_LETTER: case Character.OTHER_LETTER: classes |= ALPHA | ALNUM | GRAPH | PRINT; break; case Character.NON_SPACING_MARK: case Character.COMBINING_SPACING_MARK: case Character.ENCLOSING_MARK: classes |= PUNCT | GRAPH | PRINT; break; case Character.DECIMAL_DIGIT_NUMBER: classes |= DIGIT | XDIGIT | ALNUM | GRAPH | PRINT; break; case Character.LETTER_NUMBER: case Character.OTHER_NUMBER: classes |= ALNUM | GRAPH | PRINT; break; case Character.CONNECTOR_PUNCTUATION: case Character.DASH_PUNCTUATION: case Character.START_PUNCTUATION: case Character.END_PUNCTUATION: case Character.INITIAL_QUOTE_PUNCTUATION: case Character.FINAL_QUOTE_PUNCTUATION: case Character.OTHER_PUNCTUATION: case Character.MATH_SYMBOL: case Character.CURRENCY_SYMBOL: case Character.MODIFIER_SYMBOL: case Character.OTHER_SYMBOL: classes |= PUNCT | GRAPH | PRINT; break; case Character.SPACE_SEPARATOR: classes |= PRINT | BLANK | SPACE; break; case Character.LINE_SEPARATOR: case Character.PARAGRAPH_SEPARATOR: break; case Character.CONTROL: classes |= ((a == '\t') ? BLANK : 0) | ((a == '\t' || a == '\n' || a == '\013' || a == '\f' || a == '\r') ? SPACE : 0) | CNTRL; break; case Character.FORMAT: case Character.SURROGATE: case Character.PRIVATE_USE: case Character.UNASSIGNED: break; } return (classes & posixClass) != 0; } /** *

Creates a clone of this Alphabet, and replicates the NFA constructed from this Expression.

* * @return a clone of this Alphabet. */ public Object clone() { return new PosixClass(posixClass); } } //UnicodeCategory /** *

This class implements an {@link Lexicon.Alphabet Alphabet} containing the characters in a Unicode general category.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class UnicodeCategory extends Alphabet { /** *

The byte representing the Unicode general category.

*/ private final byte category; /** *

Constructs an Alphabet containing the characters in a Unicode general category, and builds the NFA constructed from this Expression. The class {@link Character} defines byte constants representing each of the Unicode general categories.

* * @param category The byte representing the Unicode general category. * @see Character */ public UnicodeCategory(byte category) { this.category = category; put(i = s(), this, f = s()); } /** *

Indicates whether a character occurs in this Alphabet.

* * @param a the character whose status is requested. * @return true if a occurs in this Alphabet; false otherwise. */ boolean contains(char a) { return Character.getType(a) == category; } /** *

Creates a clone of this Alphabet, and replicates the NFA constructed from this Expression.

* * @return a clone of this Alphabet. */ public Object clone() { return new UnicodeCategory(category); } } //Repetition /** *

This class implements an {@link Lexicon.Expression Expression} expressing the repetition of a regular language.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class Repetition extends Expression { /** *

The operand Expression.

*/ private final Expression e1; /** *

The minimum number of times e1 is repeated.

*/ private final int min; /** *

The maximum number of times e1 is repeated.

*/ private final int max; /** *

Constructs an Expression expressing the repetition of a regular language, and builds the NFA constructed from this Expression. Large finite values for the minimum or maximum cause the NFA constructed from the operand Expression to be copied many times, resulting in a space-inefficient NFA.

* * @param e1 the operand Expression. * @param min the minimum number of times e1 is repeated. If negative, it is assumed to be zero. * @param max the maximum number of times e1 is repeated. If negative, it is assumed to be infinity. */ public Repetition(Expression e1, int min, int max) { this.e1 = e1 = (Expression)e1.clone(); this.min = min = Math.max(min, 0); this.max = max; i = (min > 0) ? e1.i : s(); f = (min > 0) ? e1.f : i; if (min == 0 && max < 0) { put(i, null, e1.i); put(e1.f, null, i); } else { for (int k = 2; k <= min; k++) { e1 = (Expression)e1.clone(); put(f, null, e1.i); f = e1.f; } if (max > min) { Integer tail = f; put(tail, null, f = s()); for (int k = min+1; k <= max; k++) { if (k > 1) e1 = (Expression)e1.clone(); put(tail, null, e1.i); put(tail = e1.f, null, f); } } else if (max < 0) put(f, null, e1.i); } } /** *

Creates a clone of this Expression, and replicates the NFA constructed from this Expression.

* * @return a clone of this Expression. */ public Object clone() { return new Repetition(e1, min, max); } } //Concatenation /** *

This class implements an {@link Lexicon.Expression Expression} expressing the concatenation of two regular languages.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class Concatenation extends Expression { /** *

The left operand Expression.

*/ private final Expression e1; /** *

The right operand Expression.

*/ private final Expression e2; /** *

Constructs an Expression expressing the concatenation of two regular languages, and builds the NFA constructed from this Expression.

* * @param e1 the left operand Expression. * @param e2 the right operand Expression. */ public Concatenation(Expression e1, Expression e2) { this.e1 = e1 = (Expression)e1.clone(); this.e2 = e2 = (Expression)e2.clone(); i = e1.i; f = e2.f; put(e1.f, null, e2.i); } /** *

Creates a clone of this Expression, and replicates the NFA constructed from this Expression.

* * @return a clone of this Expression. */ public Object clone() { return new Concatenation(e1, e2); } } //Singleton /** *

This class implements an {@link Lexicon.Expression Expression} expressing a singleton language.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class Singleton extends Expression { /** *

The string whose singleton language is expressed.

*/ private final String x; /** *

Constructs an Expression expressing a singleton language, and builds the NFA constructed from this Expression.

* * @param x the string whose singleton language is expressed. */ public Singleton(String x) { this.x = x; f = i = s(); for (char c : x.toCharArray()) new Match(f, c, f = s()); } /** *

Creates a clone of this Expression, and replicates the NFA constructed from this Expression.

* * @return a clone of this Expression. */ public Object clone() { return new Singleton(x); } } //Union /** *

This class implements an {@link Lexicon.Expression Expression} expressing the union of two regular languages.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public static class Union extends Expression { /** *

The left operand Expression.

*/ private final Expression e1; /** *

The right operand Expression.

*/ private final Expression e2; /** *

Constructs an Expression expressing the union of two regular languages, and builds the NFA constructed from this Expression.

* * @param e1 the left operand Expression. * @param e2 the right operand Expression. */ public Union(Expression e1, Expression e2) { this.e1 = e1 = (Expression)e1.clone(); this.e2 = e2 = (Expression)e2.clone(); i = s(); f = s(); put(i, null, e1.i); put(e1.f, null, f); put(i, null, e2.i); put(e2.f, null, f); } /** *

Creates a clone of this Expression, and replicates the NFA constructed from this Expression.

* * @return a clone of this Expression. */ public Object clone() { return new Union(e1, e2); } } /** *

The mapping representing this Lexicon. A terminal is mapped to the initial state of the NFA constructed from the associated Expression.

*/ private final Map E; /** *

Puts a terminal and associated Expression into this Lexicon. The Expression supersedes any previously associated with the terminal.

* * @param a the terminal to add to this Lexicon. * @param e the Expression associated with terminal a. When grabbing, the language expressed by e matches a. */ public void put(Object a, Expression e) { E.put(a, e); I.clear(); F.clear(); } /** *

Indicates whether a symbol is a terminal in this Lexicon.

* * @param a the symbol whose status is requested. * @return true if a is a terminal in this Lexicon; false otherwise. */ boolean terminal(Object a) { return E.containsKey(a); } //Lexicon() /** *

The terminal matched by the character at the end of a source stream.

* @since 1.1, renames END_OF_SOURCE in version 1.0. */ protected static final Object $ = new String("$"); /** *

The Alphabet containing the character at the end of a source stream.

*/ private static final Expression $_EXPRESSION = new Match((char)-1); /** *

Constructs an empty Lexicon.

*/ protected Lexicon() { E = new HashMap(500); I = new Set(-200); F = new HashMap(500); put($, $_EXPRESSION); } /** *

Constructs a Lexicon that is a shallow copy of lexicon. The fields of the new Lexicon refer to the same elements as those in lexicon.

* * * @param lexicon the Lexicon to copy. */ Lexicon(Lexicon lexicon) {/*debug*/ debug = lexicon.debug;/*off*/ E = lexicon.E; I = lexicon.I; F = lexicon.F; } //Lexicon.initial /** *

Returns the initial states of the lexical NFA.

* * @return {@link #I}, computing it and {@link #F} if there is a need to compute the current initial states and final states. */ private Set initial() { if (I.isEmpty()) { for (Object a : E.keySet()) { Expression e = E.get(a); I.add(e.i); F.put(e.f, a); } closure(I); } return I; } //accept /** *

Computes the current final state, if any, in the lexical NFA.

* * @param S the current states. * @return the maximum final state in S. Returns null if S contains no final states. */ private Integer accept(Set S) { Integer f = null; for (Integer s : S) if (F.containsKey(s)) if (f == null || f < s) f = s; return f; } /** *

This class implements an {@link Lexicon.Exception Exception}.

* * @version 1.3 * @author © 1999-2009 Craig A. Rich <carich@csupomona.edu> */ public class Exception extends java.lang.Exception { /** *

The extended error message.

*/ private StringBuffer message; /** *

Constructs an Exception with a message.

* * @param message the error message. */ public Exception(String message) { super(message); } /** *

Returns the error message.

* * @return the error message. */ public String getMessage() { return (message == null) ? super.getMessage() : message.toString(); } /** *

Extends the error message in this Exception. The extended message includes the line number, message and source characters following the error.

* * @param source the source character stream. * @return this Exception with an extended message. */ Exception extend(LineNumberReader source) { if (message == null) message = new StringBuffer(132); else message.setLength(0); message.append("line "); message.append(source.getLineNumber()+1); message.append(": "); message.append(super.getMessage()); message.append(System.getProperty("line.separator")); message.append("..."); message.append(word()); try { String rest = source.readLine(); if (rest != null) message.append(rest); } catch (IOException exception) {} message.append(System.getProperty("line.separator")); message.append(" ^"); return this; } } //Lexicon.grab /** *

The states through which the lexical NFA transitions.

*/ private final Set[] R = (Set[])new Set[]{new Set(-200), new Set(-200)}; /** *

The StringBuffer containing the word most recently grabbed.

*/ private final StringBuffer w = new StringBuffer(4000); /** *

Grabs a terminal from a source character stream using this Lexicon. The variable returned by {@link #word()} is set to the longest nonempty prefix of the remaining source characters matching an Expression in this Lexicon. If no nonempty prefix matches an Expression, a Lexicon.Exception is thrown. If the longest matching prefix matches more than one Expression, the terminal associated with the Expression most recently constructed is returned. Blocks until a character is available, an I/O error occurs, or the end of the source stream is reached.

* * @param source the source character stream. * @return the terminal grabbed from source. * @throws Lexicon.Exception if an I/O or lexical error occurs. */ protected Object grab(LineNumberReader source) throws Exception { Set S = initial(); w.setLength(0); int wLength = 0; Object b = null; try { source.mark(w.capacity()); do { int a = source.read(); S = closure(transition(S, (char)a, R[w.length() % 2])); if (S.isEmpty()) break; if (a != -1) w.append((char)a); else w.append($); Integer f = accept(S); if (f != null) { wLength = w.length(); b = F.get(f); source.mark(w.capacity()); } } while (b != $); w.setLength(wLength); source.reset(); } catch (IOException exception) { throw new Exception(exception.getMessage()); } if (wLength == 0) throw new Exception("lexical error").extend(source); return b; } /** *

Returns the word most recently grabbed using this Lexicon.

* * @return the word most recently grabbed by {@link #grab(java.io.LineNumberReader) grab(source)}. */ protected String word() { return w.substring(0); } //Lexicon.interpret /** *

Repeatedly invokes {@link #grab(java.io.LineNumberReader) grab(source)} until the end of the source stream reached. Blocks until a character is available, or an I/O error occurs. This method is overridden by Grammar and its parser subclasses, so it is only invoked when this Lexicon has not been extended into a Grammar or parser.

* * @param source the source character stream. * @return the ParseTree constructed by interpreting source. A Lexicon always returns null. * @throws Lexicon.Exception if an I/O or lexical error occurs. */ Object interpret(LineNumberReader source) throws Exception {/*debug*/ if ((debug & TERMINALS) > 0) System.out.println( "----terminals\n\t" + E.keySet().toString().replaceFirst("\\[", "{").replaceAll(", ", " ").replaceFirst("\\]$", "}\n----------"));/*off*/ for (Object a; (a = grab(source)) != $;)/*debug*/ if ((debug & LEXICAL) > 0) System.out.println( a + (!a.equals(word()) ? " " + word() : ""))/*off*/ ; return null; } /** *

Interprets a source character stream using this Lexicon.

* * @param source the source character stream. * @return the ParseTree constructed by interpreting source. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(Reader source) throws Exception { return interpret(new LineNumberReader(source)); } /** *

Interprets a source string using this Lexicon.

* * @param source the source string. * @return the ParseTree constructed by interpreting source. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(String source) throws Exception { return interpret(new StringReader(source)); } /** *

Interprets a source byte stream using this Lexicon.

* * @param source the source byte stream. * @return the ParseTree constructed by interpreting source. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(InputStream source) throws Exception { return interpret(new InputStreamReader(source)); } /** *

Interprets the standard input stream using this Lexicon.

* * @return the ParseTree constructed by interpreting the standard input stream. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret() throws Exception { return interpret(System.in); } /** *

Interprets a source file using this Lexicon.

* * @param source the source file. * @return the ParseTree constructed by interpreting source. * @throws FileNotFoundException if the source file cannot be found. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(File source) throws FileNotFoundException, Exception { return interpret(new FileReader(source)); } /** *

Interprets a source pipe using this Lexicon.

* * @param source the source pipe. * @return the ParseTree constructed by interpreting source. * @throws IOException if the source pipe cannot be connected. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(PipedWriter source) throws IOException, Exception { return interpret(new PipedReader(source)); } //Lexicon.interpret(arguments) /** *

The debug switches, initially zero. The following bits enable debugging to standard output:

*
*
0x01 = TERMINALS
*
Print the set of terminals before lexical analysis
*
0x02 = LEXICAL
*
Print terminals and associated words grabbed during lexical analysis
*
0x04 = FIRST_FOLLOW
*
Print first and follow sets precomputed during syntax analysis
*
0x08 = SYNTAX
*
Print parsing decisions made during syntax analysis
*
0x10 = CONFLICT
*
Print parsing conflicts encountered during syntax analysis
*
0x20 = PARSE_TREE
*
Print each ParseTree produced by syntax analysis
*
* @since 1.1 */ protected int debug = 0; /** *

{@link #debug debug} switch constant enabling printing the set of terminals before lexical analysis.

* @since 1.1 */ protected static final int TERMINALS = 0x01; /** *

{@link #debug debug} switch constant enabling printing terminals and associated words grabbed during lexical analysis.

* @since 1.1 */ protected static final int LEXICAL = 0x02; /** *

{@link #debug debug} switch constant enabling all debugging.

* @since 1.1 */ protected static final int VERBOSE = 0xFF; }