/** * <p>This class implements a {@link Lexicon}.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public class Lexicon { //Q /** * <p>The number of lexical NFA states constructed.</p> */ private static int QSize = 0; /** * <p>Creates a new state in the lexical NFA.</p> * * @return a new state in the lexical NFA. */ private static Integer s() { return ++QSize; } //delta /** * <p>The transition relation of the lexical NFA.</p> */ private static final Stack<Stack<Object>> delta = new Stack<Stack<Object>>(); /** * <p>Puts a transition into the lexical NFA.</p> * * @param s the state from which the transition is made. * @param A the <code>Alphabet</code> on which the transition is made. * @param r the state to which the transition is made. */ private static void put(Integer s, Alphabet A, Integer r) { if (Math.max(s,r) >= delta.size()) delta.setSize(Math.max(s,r)+1); Stack<Object> pairs = delta.get(s); if (pairs == null) delta.set(s, pairs = new Stack<Object>()); pairs.push(A); pairs.push(r); } //Set /** * <p>This class implements a {@link Lexicon.Set <code>Set</code>}.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> * @param <E> the element type. */ static class Set<E> extends Stack<E> { /** * <p>The null exclusion indicator. If <code>true</code>, <code>add</code> methods will not add <code>null</code> to this <code>Set</code>.</p> */ private final boolean excludeNull; /** * <p>Constructs a <code>Set</code> with an initial capacity.</p> * * @param capacity the initial capacity. The magnitude of <code>capacity</code> is the initial capacity. The null exclusion indicator is initialized to <code>true</code> if <code>capacity</code> is negative. */ Set(int capacity) { super(); ensureCapacity(Math.abs(capacity)); excludeNull = (capacity < 0); } /** * <p>Adds an element to this <code>Set</code>. The element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> * * @param element the element to add to this <code>Set</code>. * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. */ public boolean add(E element) { if (excludeNull && element == null || contains(element)) return false; push(element); return true; } /** * <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> * * @param index the index in <code>S</code> beyond which elements are added. * @param S the <code>Set</code> to add to this <code>Set</code>. * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. */ boolean add(int index, Set<E> S) { if (S == null) return false; boolean push = isEmpty(); boolean add = false; for (int i = index; i < S.size(); i++) { E element = S.get(i); if (!(excludeNull && element == null)) if (push) { push(element); add = true; } else if (add(element)) add = true; } return add; } /** * <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> * * @param S the <code>Set</code> to add to this <code>Set</code>. * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. */ boolean add(Set<E> S) { return add(0, S); } public String toString() { StringBuffer result = new StringBuffer(80); result.append('{'); for (int i = 0; i < size(); i++) { if (i > 0) result.append(' '); result.append(get(i)); } result.append('}'); return result.toString(); } //Set } //I /** * <p>The initial states of the lexical NFA. When empty, there is a need to compute the current initial states. It is computed only on demand created by {@link #initial()}.</p> */ private final Set<Integer> I; //F /** * <p>The final states of the lexical NFA. A final state is mapped to the terminal it accepts in this <code>Lexicon</code>. When empty, there is a need to compute current final states. It is computed only on demand created by {@link #initial()}.</p> */ private final Map<Integer, Object> F; //Lexicon.transition /** * <p>Computes a transition using the lexical NFA.</p> * * @param S the states from which the transition is made. * @param a the character on which the transition is made. * @param R the states to which the transition is made. * @return the states to which the transition is made. */ private static Set<Integer> transition(Set<Integer> S, char a, Set<Integer> R) { R.clear(); for (Integer s : S) { Stack<Object> pairs = delta.get(s); if (pairs != null) for (int k = 0; k < pairs.size(); k += 2) { Alphabet A = (Alphabet)pairs.get(k); if (A != null) { Integer r = (Integer)pairs.get(k+1); if (A.contains(a)) R.add(r); } } } return R; } //Lexicon.closure /** * <p>Computes a reflexive transitive closure under empty transition using the lexical NFA. The closure is computed in place by a breadth-first search expanding <code>S</code>.</p> * * @param S the states whose reflexive transitive closure is computed under empty transition. * @return the reflexive transitive closure of <code>S</code> under empty transition. */ private static Set<Integer> closure(Set<Integer> S) { for (int i = 0; i < S.size(); i++) { Integer s = S.get(i); Stack<Object> pairs = delta.get(s); if (pairs != null) for (int k = 0; k < pairs.size(); k += 2) { Alphabet A = (Alphabet)pairs.get(k); if (A == null) { Integer r = (Integer)pairs.get(k+1); S.add(r); } } } return S; } //Expression /** * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a regular language.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ abstract public static class Expression implements Cloneable { /** * <p>The initial state of the NFA constructed from this <code>Expression</code>.</p> */ Integer i; /** * <p>The final state of the NFA constructed from this <code>Expression</code>.</p> */ Integer f; /** * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Expression</code>. */ abstract public Object clone(); } //Alphabet /** * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} of character symbols.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ abstract public static class Alphabet extends Expression { /** * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> * * @param a the character whose status is requested. * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. */ abstract boolean contains(char a); } //Match /** * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing some characters.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class Match extends Alphabet { /** * <p>The {@link Character} or {@link String} representing this <code>Alphabet</code>.</p> */ final Object A; /** * <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param i the initial state of the NFA constructed. * @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>. * @param f the final state of the NFA constructed. */ private Match(Integer i, Object A, Integer f) { this.A = A; put(this.i = i, this, this.f = f); } /** * <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param i the initial state of the NFA constructed. * @param a the character in this <code>Alphabet</code>. * @param f the final state of the NFA constructed. */ private Match(Integer i, char a, Integer f) { this(i, new Character(a), f); } /** * <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param a the character in this <code>Alphabet</code>. */ public Match(char a) { this(s(), a, s()); } /** * <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>. */ public Match(Object A) { this(s(), A, s()); } /** * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> * * @param a the character whose status is requested. * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. */ boolean contains(char a) { if (A instanceof Character) return (Character)A == a; if (A instanceof String) return ((String)A).indexOf(a) != -1; if (A instanceof Stack<?>) for (Alphabet alphabet : (Stack<Alphabet>)A) if (alphabet.contains(a)) return true; return false; } /** * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Alphabet</code>. */ public Object clone() { return new Match(A); } } //NonMatch /** * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing all except some characters.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class NonMatch extends Match { /** * <p>Constructs an <code>Alphabet</code> containing all characters except one, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param a the character not in this <code>Alphabet</code>. */ public NonMatch(char a) { super(a); } /** * <p>Constructs an <code>Alphabet</code> containing all characters except some, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param A the {@link Character} or {@link String} of characters not in this <code>Alphabet</code>. */ public NonMatch(Object A) { super(A); } /** * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> * * @param a the character whose status is requested. * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. */ boolean contains(char a) { return a != (char)-1 && !super.contains(a); } /** * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Alphabet</code>. */ public Object clone() { return new NonMatch(A); } } //Range /** * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a range.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class Range extends Alphabet { /** * <p>The first character in the range.</p> */ private final char a1; /** * <p>The last character in the range.</p> */ private final char a2; /** * <p>Constructs an <code>Alphabet</code> containing the characters in a range, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param a1 the first character in the range. * @param a2 the last character in the range. */ public Range(char a1, char a2) { this.a1 = a1; this.a2 = a2; put(i = s(), this, f = s()); } /** * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> * * @param a the character whose status is requested. * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. */ boolean contains(char a) { return a1 <= a && a <= a2; } /** * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Alphabet</code>. */ public Object clone() { return new Range(a1, a2); } } /** * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a POSIX character class.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class PosixClass extends Alphabet { /** * <p>The bit mask representing this <code>PosixClass</code>.</p> */ private final int posixClass; /** * <p>Constructs an <code>Alphabet</code> containing the characters in a POSIX character class, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param posixClass the bit mask representing this <code>PosixClass</code>. */ private PosixClass(int posixClass) { this.posixClass = posixClass; put(i = s(), this, f = s()); } /** * <p>Creates an <code>Alphabet</code> containing the uppercase alphabetic characters.</p> * * @return an <code>Alphabet</code> containing the uppercase alphabetic characters. */ public static PosixClass upper() { return new PosixClass(0x0001); } /** * <p>Creates an <code>Alphabet</code> containing the lowercase alphabetic characters.</p> * * @return an <code>Alphabet</code> containing the lowercase alphabetic characters. */ public static PosixClass lower() { return new PosixClass(0x0002); } /** * <p>Creates an <code>Alphabet</code> containing the alphabetic characters.</p> * * @return an <code>Alphabet</code> containing the alphabetic characters. */ public static PosixClass alpha() { return new PosixClass(0x0004); } /** * <p>Creates an <code>Alphabet</code> containing the decimal digit characters.</p> * * @return an <code>Alphabet</code> containing the decimal digit characters. */ public static PosixClass digit() { return new PosixClass(0x0008); } /** * <p>Creates an <code>Alphabet</code> containing the hexadecimal digit characters.</p> * * @return an <code>Alphabet</code> containing the hexadecimal digit characters. */ public static PosixClass xdigit() { return new PosixClass(0x0010); } /** * <p>Creates an <code>Alphabet</code> containing the alphanumeric characters.</p> * * @return an <code>Alphabet</code> containing the alphanumeric characters. */ public static PosixClass alnum() { return new PosixClass(0x0020); } /** * <p>Creates an <code>Alphabet</code> containing the punctuation characters.</p> * * @return an <code>Alphabet</code> containing the punctuation characters. */ public static PosixClass punct() { return new PosixClass(0x0040); } /** * <p>Creates an <code>Alphabet</code> containing the graphical characters.</p> * * @return an <code>Alphabet</code> containing the graphical characters. */ public static PosixClass graph() { return new PosixClass(0x0080); } /** * <p>Creates an <code>Alphabet</code> containing the printable characters.</p> * * @return an <code>Alphabet</code> containing the printable characters. */ public static PosixClass print() { return new PosixClass(0x0100); } /** * <p>Creates an <code>Alphabet</code> containing the blank characters.</p> * * @return an <code>Alphabet</code> containing the blank characters. */ public static PosixClass blank() { return new PosixClass(0x0200); } /** * <p>Creates an <code>Alphabet</code> containing the space characters.</p> * * @return an <code>Alphabet</code> containing the space characters. */ public static PosixClass space() { return new PosixClass(0x0400); } /** * <p>Creates an <code>Alphabet</code> containing the control characters.</p> * * @return an <code>Alphabet</code> containing the control characters. */ public static PosixClass cntrl() { return new PosixClass(0x0800); } /** * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> * * @param a the character whose status is requested. * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. */ boolean contains(char a) { int UPPER = 0x0001; int LOWER = 0x0002; int ALPHA = 0x0004; int DIGIT = 0x0008; int XDIGIT = 0x0010; int ALNUM = 0x0020; int PUNCT = 0x0040; int GRAPH = 0x0080; int PRINT = 0x0100; int BLANK = 0x0200; int SPACE = 0x0400; int CNTRL = 0x0800; int classes = 0; switch (Character.getType(a)) { default: break; case Character.UPPERCASE_LETTER: classes |= UPPER | ALPHA | (('A' <= a && a <= 'F') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break; case Character.LOWERCASE_LETTER: classes |= LOWER | ALPHA | (('a' <= a && a <= 'f') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break; case Character.TITLECASE_LETTER: case Character.MODIFIER_LETTER: case Character.OTHER_LETTER: classes |= ALPHA | ALNUM | GRAPH | PRINT; break; case Character.NON_SPACING_MARK: case Character.COMBINING_SPACING_MARK: case Character.ENCLOSING_MARK: classes |= PUNCT | GRAPH | PRINT; break; case Character.DECIMAL_DIGIT_NUMBER: classes |= DIGIT | XDIGIT | ALNUM | GRAPH | PRINT; break; case Character.LETTER_NUMBER: case Character.OTHER_NUMBER: classes |= ALNUM | GRAPH | PRINT; break; case Character.CONNECTOR_PUNCTUATION: case Character.DASH_PUNCTUATION: case Character.START_PUNCTUATION: case Character.END_PUNCTUATION: case Character.INITIAL_QUOTE_PUNCTUATION: case Character.FINAL_QUOTE_PUNCTUATION: case Character.OTHER_PUNCTUATION: case Character.MATH_SYMBOL: case Character.CURRENCY_SYMBOL: case Character.MODIFIER_SYMBOL: case Character.OTHER_SYMBOL: classes |= PUNCT | GRAPH | PRINT; break; case Character.SPACE_SEPARATOR: classes |= PRINT | BLANK | SPACE; break; case Character.LINE_SEPARATOR: case Character.PARAGRAPH_SEPARATOR: break; case Character.CONTROL: classes |= ((a == '\t') ? BLANK : 0) | ((a == '\t' || a == '\n' || a == '\013' || a == '\f' || a == '\r') ? SPACE : 0) | CNTRL; break; case Character.FORMAT: case Character.SURROGATE: case Character.PRIVATE_USE: case Character.UNASSIGNED: break; } return (classes & posixClass) != 0; } /** * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Alphabet</code>. */ public Object clone() { return new PosixClass(posixClass); } } //UnicodeCategory /** * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a Unicode general category.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class UnicodeCategory extends Alphabet { /** * <p>The byte representing the Unicode general category.</p> */ private final byte category; /** * <p>Constructs an <code>Alphabet</code> containing the characters in a Unicode general category, and builds the NFA constructed from this <code>Expression</code>. The class {@link Character} defines byte constants representing each of the Unicode general categories.</p> * * @param category The byte representing the Unicode general category. * @see Character */ public UnicodeCategory(byte category) { this.category = category; put(i = s(), this, f = s()); } /** * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> * * @param a the character whose status is requested. * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. */ boolean contains(char a) { return Character.getType(a) == category; } /** * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Alphabet</code>. */ public Object clone() { return new UnicodeCategory(category); } } //Repetition /** * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the repetition of a regular language.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class Repetition extends Expression { /** * <p>The operand <code>Expression</code>.</p> */ private final Expression e1; /** * <p>The minimum number of times <code>e1</code> is repeated.</p> */ private final int min; /** * <p>The maximum number of times <code>e1</code> is repeated.</p> */ private final int max; /** * <p>Constructs an <code>Expression</code> expressing the repetition of a regular language, and builds the NFA constructed from this <code>Expression</code>. Large finite values for the minimum or maximum cause the NFA constructed from the operand <code>Expression</code> to be copied many times, resulting in a space-inefficient NFA.</p> * * @param e1 the operand <code>Expression</code>. * @param min the minimum number of times <code>e1</code> is repeated. If negative, it is assumed to be zero. * @param max the maximum number of times <code>e1</code> is repeated. If negative, it is assumed to be infinity. */ public Repetition(Expression e1, int min, int max) { this.e1 = e1 = (Expression)e1.clone(); this.min = min = Math.max(min, 0); this.max = max; i = (min > 0) ? e1.i : s(); f = (min > 0) ? e1.f : i; if (min == 0 && max < 0) { put(i, null, e1.i); put(e1.f, null, i); } else { for (int k = 2; k <= min; k++) { e1 = (Expression)e1.clone(); put(f, null, e1.i); f = e1.f; } if (max > min) { Integer tail = f; put(tail, null, f = s()); for (int k = min+1; k <= max; k++) { if (k > 1) e1 = (Expression)e1.clone(); put(tail, null, e1.i); put(tail = e1.f, null, f); } } else if (max < 0) put(f, null, e1.i); } } /** * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Expression</code>. */ public Object clone() { return new Repetition(e1, min, max); } } //Concatenation /** * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the concatenation of two regular languages.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class Concatenation extends Expression { /** * <p>The left operand <code>Expression</code>.</p> */ private final Expression e1; /** * <p>The right operand <code>Expression</code>.</p> */ private final Expression e2; /** * <p>Constructs an <code>Expression</code> expressing the concatenation of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param e1 the left operand <code>Expression</code>. * @param e2 the right operand <code>Expression</code>. */ public Concatenation(Expression e1, Expression e2) { this.e1 = e1 = (Expression)e1.clone(); this.e2 = e2 = (Expression)e2.clone(); i = e1.i; f = e2.f; put(e1.f, null, e2.i); } /** * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Expression</code>. */ public Object clone() { return new Concatenation(e1, e2); } } //Singleton /** * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a singleton language.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class Singleton extends Expression { /** * <p>The string whose singleton language is expressed.</p> */ private final String x; /** * <p>Constructs an <code>Expression</code> expressing a singleton language, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param x the string whose singleton language is expressed. */ public Singleton(String x) { this.x = x; f = i = s(); for (char c : x.toCharArray()) new Match(f, c, f = s()); } /** * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Expression</code>. */ public Object clone() { return new Singleton(x); } } //Union /** * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the union of two regular languages.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public static class Union extends Expression { /** * <p>The left operand <code>Expression</code>.</p> */ private final Expression e1; /** * <p>The right operand <code>Expression</code>.</p> */ private final Expression e2; /** * <p>Constructs an <code>Expression</code> expressing the union of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p> * * @param e1 the left operand <code>Expression</code>. * @param e2 the right operand <code>Expression</code>. */ public Union(Expression e1, Expression e2) { this.e1 = e1 = (Expression)e1.clone(); this.e2 = e2 = (Expression)e2.clone(); i = s(); f = s(); put(i, null, e1.i); put(e1.f, null, f); put(i, null, e2.i); put(e2.f, null, f); } /** * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> * * @return a clone of this <code>Expression</code>. */ public Object clone() { return new Union(e1, e2); } } //expression(ere) /** * <p>Creates an <code>Expression</code> by interpreting a POSIX extended regular expression (ERE), as used in egrep. The syntax and semantics for EREs is formally specified by the <a href="../../../src/gi/ERE.java">ERE <code>Grammar</code></a>. Provides a convenient method for constructing an <code>Expression</code>, at the cost of an LR(1) parse. Implementations seeking maximum speed should avoid this method and use explicit <code>Expression</code> subclass constructors; for example,</p> * <blockquote><code>new Union(new NonMatch("0"), new Singleton("foo"))</code></blockquote> * instead of * <blockquote><code>expression("[^0]|foo")</code></blockquote> * * @param ere the POSIX extended regular expression (ERE) to interpret. * @return the <code>Expression</code> constructed by interpreting <code>ere</code>. * @throws Lexicon.Exception if an ERE syntax error occurs. */ public static Expression expression(String ere) throws Exception { return ERE.expression(ere); } //E /** * <p>The mapping representing this <code>Lexicon</code>. A terminal is mapped to the initial state of the NFA constructed from the associated <code>Expression</code>.</p> */ private final Map<Object, Expression> E; /** * <p>Puts a terminal and associated <code>Expression</code> into this <code>Lexicon</code>. The <code>Expression</code> supersedes any previously associated with the terminal.</p> * * @param a the terminal to add to this <code>Lexicon</code>. * @param e the <code>Expression</code> associated with terminal <code>a</code>. When grabbing, the language expressed by <code>e</code> matches <code>a</code>. */ public void put(Object a, Expression e) { E.put(a, e); I.clear(); F.clear(); } /** * <p>Indicates whether a symbol is a terminal in this <code>Lexicon</code>.</p> * * @param a the symbol whose status is requested. * @return <code>true</code> if <code>a</code> is a terminal in this <code>Lexicon</code>; <code>false</code> otherwise. */ boolean terminal(Object a) { return E.containsKey(a); } //Lexicon() /** * <p>The terminal matched by the character at the end of a source stream.</p> * @since 1.1, renames <code>END_OF_SOURCE</code> in version 1.0. */ protected static final Object $ = new String("$"); /** * <p>The <code>Alphabet</code> containing the character at the end of a source stream.</p> */ private static final Expression $_EXPRESSION = new Match((char)-1); /** * <p>Constructs an empty <code>Lexicon</code>.</p> */ protected Lexicon() { E = new HashMap<Object, Expression>(500); I = new Set<Integer>(-200); F = new HashMap<Integer, Object>(500); put($, $_EXPRESSION); } /** * <p>Constructs a <code>Lexicon</code> that is a shallow copy of <code>lexicon</code>. The fields of the new <code>Lexicon</code> refer to the same elements as those in <code>lexicon</code>.</p> * * * @param lexicon the <code>Lexicon</code> to copy. */ Lexicon(Lexicon lexicon) {/*debug*/ debug = lexicon.debug;/*off*/ E = lexicon.E; I = lexicon.I; F = lexicon.F; } //Lexicon.initial /** * <p>Returns the initial states of the lexical NFA.</p> * * @return {@link #I}, computing it and {@link #F} if there is a need to compute the current initial states and final states. */ private Set<Integer> initial() { if (I.isEmpty()) { for (Object a : E.keySet()) { Expression e = E.get(a); I.add(e.i); F.put(e.f, a); } closure(I); } return I; } //accept /** * <p>Computes the current final state, if any, in the lexical NFA.</p> * * @param S the current states. * @return the maximum final state in <code>S</code>. Returns <code>null</code> if <code>S</code> contains no final states. */ private Integer accept(Set<Integer> S) { Integer f = null; for (Integer s : S) if (F.containsKey(s)) if (f == null || f < s) f = s; return f; } /** * <p>This class implements an {@link Lexicon.Exception <code>Exception</code>}.</p> * * @version 1.3 * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> */ public class Exception extends java.lang.Exception { /** * <p>The extended error message.</p> */ private StringBuffer message; /** * <p>Constructs an <code>Exception</code> with a message.</p> * * @param message the error message. */ public Exception(String message) { super(message); } /** * <p>Returns the error message.</p> * * @return the error message. */ public String getMessage() { return (message == null) ? super.getMessage() : message.toString(); } /** * <p>Extends the error message in this <code>Exception</code>. The extended message includes the line number, message and source characters following the error.</p> * * @param source the source character stream. * @return this <code>Exception</code> with an extended message. */ Exception extend(LineNumberReader source) { if (message == null) message = new StringBuffer(132); else message.setLength(0); message.append("line "); message.append(source.getLineNumber()+1); message.append(": "); message.append(super.getMessage()); message.append(System.getProperty("line.separator")); message.append("..."); message.append(word()); try { String rest = source.readLine(); if (rest != null) message.append(rest); } catch (IOException exception) {} message.append(System.getProperty("line.separator")); message.append(" ^"); return this; } } //Lexicon.grab /** * <p>The states through which the lexical NFA transitions.</p> */ private final Set<Integer>[] R = (Set<Integer>[])new Set<?>[]{new Set<Integer>(-200), new Set<Integer>(-200)}; /** * <p>The <code>StringBuffer</code> containing the word most recently grabbed.</p> */ private final StringBuffer w = new StringBuffer(4000); /** * <p>Grabs a terminal from a source character stream using this <code>Lexicon</code>. The variable returned by {@link #word()} is set to the longest nonempty prefix of the remaining source characters matching an <code>Expression</code> in this <code>Lexicon</code>. If no nonempty prefix matches an <code>Expression</code>, a <code>Lexicon.Exception</code> is thrown. If the longest matching prefix matches more than one <code>Expression</code>, the terminal associated with the <code>Expression</code> most recently constructed is returned. Blocks until a character is available, an I/O error occurs, or the end of the source stream is reached.</p> * * @param source the source character stream. * @return the terminal grabbed from <code>source</code>. * @throws Lexicon.Exception if an I/O or lexical error occurs. */ protected Object grab(LineNumberReader source) throws Exception { Set<Integer> S = initial(); w.setLength(0); int wLength = 0; Object b = null; try { source.mark(w.capacity()); do { int a = source.read(); S = closure(transition(S, (char)a, R[w.length() % 2])); if (S.isEmpty()) break; if (a != -1) w.append((char)a); else w.append($); Integer f = accept(S); if (f != null) { wLength = w.length(); b = F.get(f); source.mark(w.capacity()); } } while (b != $); w.setLength(wLength); source.reset(); } catch (IOException exception) { throw new Exception(exception.getMessage()); } if (wLength == 0) throw new Exception("lexical error").extend(source); return b; } /** * <p>Returns the word most recently grabbed using this <code>Lexicon</code>.</p> * * @return the word most recently grabbed by {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>}. */ protected String word() { return w.substring(0); } //Lexicon.interpret /** * <p>Repeatedly invokes {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>} until the end of the source stream reached. Blocks until a character is available, or an I/O error occurs. This method is overridden by <code>Grammar</code> and its parser subclasses, so it is only invoked when this <code>Lexicon</code> has not been extended into a <code>Grammar</code> or parser.</p> * * @param source the source character stream. * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. A <code>Lexicon</code> always returns null. * @throws Lexicon.Exception if an I/O or lexical error occurs. */ Object interpret(LineNumberReader source) throws Exception {/*debug*/ if ((debug & TERMINALS) > 0) System.out.println( "----terminals\n\t" + E.keySet().toString().replaceFirst("\\[", "{").replaceAll(", ", " ").replaceFirst("\\]$", "}\n----------"));/*off*/ for (Object a; (a = grab(source)) != $;)/*debug*/ if ((debug & LEXICAL) > 0) System.out.println( a + (!a.equals(word()) ? " " + word() : ""))/*off*/ ; return null; } /** * <p>Interprets a source character stream using this <code>Lexicon</code>.</p> * * @param source the source character stream. * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(Reader source) throws Exception { return interpret(new LineNumberReader(source)); } /** * <p>Interprets a source string using this <code>Lexicon</code>.</p> * * @param source the source string. * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(String source) throws Exception { return interpret(new StringReader(source)); } /** * <p>Interprets a source byte stream using this <code>Lexicon</code>.</p> * * @param source the source byte stream. * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(InputStream source) throws Exception { return interpret(new InputStreamReader(source)); } /** * <p>Interprets the standard input stream using this <code>Lexicon</code>.</p> * * @return the <code>ParseTree</code> constructed by interpreting the standard input stream. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret() throws Exception { return interpret(System.in); } /** * <p>Interprets a source file using this <code>Lexicon</code>.</p> * * @param source the source file. * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. * @throws FileNotFoundException if the source file cannot be found. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(File source) throws FileNotFoundException, Exception { return interpret(new FileReader(source)); } /** * <p>Interprets a source pipe using this <code>Lexicon</code>.</p> * * @param source the source pipe. * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. * @throws IOException if the source pipe cannot be connected. * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. */ public Object interpret(PipedWriter source) throws IOException, Exception { return interpret(new PipedReader(source)); } //Lexicon.interpret(arguments) /** * <p>The debug switches, initially zero. The following bits enable debugging to standard output:</p> * <blockquote><dl> * <dt><code>0x01</code> = <code>TERMINALS</code></dt> * <dd>Print the set of terminals before lexical analysis</dd> * <dt><code>0x02</code> = <code>LEXICAL</code> </dt> * <dd>Print terminals and associated words grabbed during lexical analysis</dd> * <dt><code>0x04</code> = <code>FIRST_FOLLOW</code></dt> * <dd>Print first and follow sets precomputed during syntax analysis</dd> * <dt><code>0x08</code> = <code>SYNTAX</code></dt> * <dd>Print parsing decisions made during syntax analysis</dd> * <dt><code>0x10</code> = <code>CONFLICT</code></dt> * <dd>Print parsing conflicts encountered during syntax analysis</dd> * <dt><code>0x20</code> = <code>PARSE_TREE</code></dt> * <dd>Print each <code>ParseTree</code> produced by syntax analysis</dd> * </dl></blockquote> * @since 1.1 */ protected int debug = 0; /** * <p>{@link #debug <code>debug</code>} switch constant enabling printing the set of terminals before lexical analysis.</p> * @since 1.1 */ protected static final int TERMINALS = 0x01; /** * <p>{@link #debug <code>debug</code>} switch constant enabling printing terminals and associated words grabbed during lexical analysis.</p> * @since 1.1 */ protected static final int LEXICAL = 0x02; /** * <p>{@link #debug <code>debug</code>} switch constant enabling all debugging.</p> * @since 1.1 */ protected static final int VERBOSE = 0xFF; /** * <p>Lexical analysis by command-line arguments using this <code>Lexicon</code>. The first I/O or lexical error that occurs during lexical analysis is printed to the standard error stream.</p> * * @param arguments the command-line arguments controlling the interpreter. * <blockquote> * The following arguments may appear zero or more times, are processed in order, and have the following effects: * <blockquote><dl> * <dt><code>-t</code>, <code>--terminals</code></dt> * <dd>Print the set of terminals in this <code>Lexicon</code> before subsequent lexical analyses.</dd> * <dt><code>-l</code>, <code>--lexical</code></dt> * <dd>Print terminals in this <code>Lexicon</code> grabbed during subsequent lexical analyses.</dd> * <dt><code>-v</code>, <code>--verbose</code></dt> * <dd>Print maximum debugging. Equivalent to <code>-tl</code>.</dd> * <dt><code>-</code></dt> * <dd>Lexically analyze the standard input stream using this <code>Lexicon</code>.</dd> * <dt><code><var>filename</var></code></dt> * <dd>Lexically analyze source file <code><var>filename</var></code> using this <code>Lexicon</code>.</dd> * </dl></blockquote> * If no <code><var>filename</var></code> arguments are given, the standard input stream is lexically analyzed. * </blockquote> */ public void interpret(String[] arguments) { boolean sourceSpecified = false; Getopt options = new Getopt(getClass().getName(), arguments, "-tlv", new LongOpt[]{ new LongOpt("terminals", 0, null, 't'), new LongOpt("lexical", 0, null, 'l'), new LongOpt("verbose", 0, null, 'v'), } ); for (int option; (option = options.getopt()) != -1;) switch (option) { case 't': debug |= TERMINALS; break; case 'l': debug |= LEXICAL; break; case 'v': debug |= VERBOSE; break; case 1: String argument = options.getOptarg(); try { sourceSpecified = true; if (argument.equals("-")) interpret(); else interpret(new FileReader(argument)); } catch (java.lang.Exception exception) { System.err.println(exception); } } if (!sourceSpecified) try { interpret(); } catch (java.lang.Exception exception) { System.err.println(exception); } } }
Snippet is not live.
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #2000386 |
Snippet name: | class Lexicon from |
Eternal ID of this version: | #2000386/1 |
Text MD5: | f70d6a510f2b4939174dae839d65b8e5 |
Author: | stefan |
Category: | javax |
Type: | New Tinybrain snippet |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-06-25 14:30:33 |
Source code size: | 43288 bytes / 1263 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 569 / 111 |
Referenced in: | [show references] |