/**
* <p>This class implements a {@link Lexicon}.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
class Lexicon {
//Q
/**
* <p>The number of lexical NFA states constructed.</p>
*/
private static int QSize = 0;
/**
* <p>Creates a new state in the lexical NFA.</p>
*
* @return a new state in the lexical NFA.
*/
private static Integer s() {
return ++QSize;
}
//delta
/**
* <p>The transition relation of the lexical NFA.</p>
*/
private static final Stack<Stack<Object>> delta = new Stack<Stack<Object>>();
/**
* <p>Puts a transition into the lexical NFA.</p>
*
* @param s the state from which the transition is made.
* @param A the <code>Alphabet</code> on which the transition is made.
* @param r the state to which the transition is made.
*/
private static void put(Integer s, Alphabet A, Integer r) {
if (Math.max(s,r) >= delta.size()) delta.setSize(Math.max(s,r)+1);
Stack<Object> pairs = delta.get(s);
if (pairs == null) delta.set(s, pairs = new Stack<Object>());
pairs.push(A);
pairs.push(r);
}
//Set
/**
* <p>This class implements a {@link Lexicon.Set <code>Set</code>}.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
* @param <E> the element type.
*/
static class Set<E> extends Stack<E> {
/**
* <p>The null exclusion indicator. If <code>true</code>, <code>add</code> methods will not add <code>null</code> to this <code>Set</code>.</p>
*/
private final boolean excludeNull;
/**
* <p>Constructs a <code>Set</code> with an initial capacity.</p>
*
* @param capacity the initial capacity. The magnitude of <code>capacity</code> is the initial capacity. The null exclusion indicator is initialized to <code>true</code> if <code>capacity</code> is negative.
*/
Set(int capacity) {
super();
ensureCapacity(Math.abs(capacity));
excludeNull = (capacity < 0);
}
/**
* <p>Adds an element to this <code>Set</code>. The element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p>
*
* @param element the element to add to this <code>Set</code>.
* @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise.
*/
public boolean add(E element) {
if (excludeNull && element == null || contains(element)) return false;
push(element);
return true;
}
/**
* <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p>
*
* @param index the index in <code>S</code> beyond which elements are added.
* @param S the <code>Set</code> to add to this <code>Set</code>.
* @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise.
*/
boolean add(int index, Set<E> S) {
if (S == null) return false;
boolean push = isEmpty();
boolean add = false;
for (int i = index; i < S.size(); i++) {
E element = S.get(i);
if (!(excludeNull && element == null))
if (push) {
push(element);
add = true;
}
else if (add(element))
add = true;
}
return add;
}
/**
* <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p>
*
* @param S the <code>Set</code> to add to this <code>Set</code>.
* @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise.
*/
boolean add(Set<E> S) {
return add(0, S);
}
public String toString() {
StringBuffer result = new StringBuffer(80);
result.append('{');
for (int i = 0; i < size(); i++) {
if (i > 0) result.append(' ');
result.append(get(i));
}
result.append('}');
return result.toString();
}
//Set
}
//I
/**
* <p>The initial states of the lexical NFA. When empty, there is a need to compute the current initial states. It is computed only on demand created by {@link #initial()}.</p>
*/
private final Set<Integer> I;
//F
/**
* <p>The final states of the lexical NFA. A final state is mapped to the terminal it accepts in this <code>Lexicon</code>. When empty, there is a need to compute current final states. It is computed only on demand created by {@link #initial()}.</p>
*/
private final Map<Integer, Object> F;
//Lexicon.transition
/**
* <p>Computes a transition using the lexical NFA.</p>
*
* @param S the states from which the transition is made.
* @param a the character on which the transition is made.
* @param R the states to which the transition is made.
* @return the states to which the transition is made.
*/
private static Set<Integer> transition(Set<Integer> S, char a, Set<Integer> R) {
R.clear();
for (Integer s : S) {
Stack<Object> pairs = delta.get(s);
if (pairs != null)
for (int k = 0; k < pairs.size(); k += 2) {
Alphabet A = (Alphabet)pairs.get(k);
if (A != null) {
Integer r = (Integer)pairs.get(k+1);
if (A.contains(a)) R.add(r);
}
}
}
return R;
}
//Lexicon.closure
/**
* <p>Computes a reflexive transitive closure under empty transition using the lexical NFA. The closure is computed in place by a breadth-first search expanding <code>S</code>.</p>
*
* @param S the states whose reflexive transitive closure is computed under empty transition.
* @return the reflexive transitive closure of <code>S</code> under empty transition.
*/
private static Set<Integer> closure(Set<Integer> S) {
for (int i = 0; i < S.size(); i++) {
Integer s = S.get(i);
Stack<Object> pairs = delta.get(s);
if (pairs != null)
for (int k = 0; k < pairs.size(); k += 2) {
Alphabet A = (Alphabet)pairs.get(k);
if (A == null) {
Integer r = (Integer)pairs.get(k+1);
S.add(r);
}
}
}
return S;
}
//Expression
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a regular language.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
abstract public static class Expression implements Cloneable {
/**
* <p>The initial state of the NFA constructed from this <code>Expression</code>.</p>
*/
Integer i;
/**
* <p>The final state of the NFA constructed from this <code>Expression</code>.</p>
*/
Integer f;
/**
* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Expression</code>.
*/
abstract public Object clone();
}
//Alphabet
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} of character symbols.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
abstract public static class Alphabet extends Expression {
/**
* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
*
* @param a the character whose status is requested.
* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
*/
abstract boolean contains(char a);
}
//Match
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing some characters.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class Match extends Alphabet {
/**
* <p>The {@link Character} or {@link String} representing this <code>Alphabet</code>.</p>
*/
final Object A;
/**
* <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param i the initial state of the NFA constructed.
* @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>.
* @param f the final state of the NFA constructed.
*/
private Match(Integer i, Object A, Integer f) {
this.A = A;
put(this.i = i, this, this.f = f);
}
/**
* <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param i the initial state of the NFA constructed.
* @param a the character in this <code>Alphabet</code>.
* @param f the final state of the NFA constructed.
*/
private Match(Integer i, char a, Integer f) {
this(i, new Character(a), f);
}
/**
* <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param a the character in this <code>Alphabet</code>.
*/
public Match(char a) {
this(s(), a, s());
}
/**
* <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>.
*/
public Match(Object A) {
this(s(), A, s());
}
/**
* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
*
* @param a the character whose status is requested.
* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
*/
boolean contains(char a) {
if (A instanceof Character)
return (Character)A == a;
if (A instanceof String)
return ((String)A).indexOf(a) != -1;
if (A instanceof Stack<?>)
for (Alphabet alphabet : (Stack<Alphabet>)A)
if (alphabet.contains(a)) return true;
return false;
}
/**
* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Alphabet</code>.
*/
public Object clone() {
return new Match(A);
}
}
//NonMatch
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing all except some characters.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class NonMatch extends Match {
/**
* <p>Constructs an <code>Alphabet</code> containing all characters except one, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param a the character not in this <code>Alphabet</code>.
*/
public NonMatch(char a) {
super(a);
}
/**
* <p>Constructs an <code>Alphabet</code> containing all characters except some, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param A the {@link Character} or {@link String} of characters not in this <code>Alphabet</code>.
*/
public NonMatch(Object A) {
super(A);
}
/**
* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
*
* @param a the character whose status is requested.
* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
*/
boolean contains(char a) {
return a != (char)-1 && !super.contains(a);
}
/**
* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Alphabet</code>.
*/
public Object clone() {
return new NonMatch(A);
}
}
//Range
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a range.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class Range extends Alphabet {
/**
* <p>The first character in the range.</p>
*/
private final char a1;
/**
* <p>The last character in the range.</p>
*/
private final char a2;
/**
* <p>Constructs an <code>Alphabet</code> containing the characters in a range, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param a1 the first character in the range.
* @param a2 the last character in the range.
*/
public Range(char a1, char a2) {
this.a1 = a1;
this.a2 = a2;
put(i = s(), this, f = s());
}
/**
* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
*
* @param a the character whose status is requested.
* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
*/
boolean contains(char a) {
return a1 <= a && a <= a2;
}
/**
* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Alphabet</code>.
*/
public Object clone() {
return new Range(a1, a2);
}
}
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a POSIX character class.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class PosixClass extends Alphabet {
/**
* <p>The bit mask representing this <code>PosixClass</code>.</p>
*/
private final int posixClass;
/**
* <p>Constructs an <code>Alphabet</code> containing the characters in a POSIX character class, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param posixClass the bit mask representing this <code>PosixClass</code>.
*/
private PosixClass(int posixClass) {
this.posixClass = posixClass;
put(i = s(), this, f = s());
}
/**
* <p>Creates an <code>Alphabet</code> containing the uppercase alphabetic characters.</p>
*
* @return an <code>Alphabet</code> containing the uppercase alphabetic characters.
*/
public static PosixClass upper() {
return new PosixClass(0x0001);
}
/**
* <p>Creates an <code>Alphabet</code> containing the lowercase alphabetic characters.</p>
*
* @return an <code>Alphabet</code> containing the lowercase alphabetic characters.
*/
public static PosixClass lower() {
return new PosixClass(0x0002);
}
/**
* <p>Creates an <code>Alphabet</code> containing the alphabetic characters.</p>
*
* @return an <code>Alphabet</code> containing the alphabetic characters.
*/
public static PosixClass alpha() {
return new PosixClass(0x0004);
}
/**
* <p>Creates an <code>Alphabet</code> containing the decimal digit characters.</p>
*
* @return an <code>Alphabet</code> containing the decimal digit characters.
*/
public static PosixClass digit() {
return new PosixClass(0x0008);
}
/**
* <p>Creates an <code>Alphabet</code> containing the hexadecimal digit characters.</p>
*
* @return an <code>Alphabet</code> containing the hexadecimal digit characters.
*/
public static PosixClass xdigit() {
return new PosixClass(0x0010);
}
/**
* <p>Creates an <code>Alphabet</code> containing the alphanumeric characters.</p>
*
* @return an <code>Alphabet</code> containing the alphanumeric characters.
*/
public static PosixClass alnum() {
return new PosixClass(0x0020);
}
/**
* <p>Creates an <code>Alphabet</code> containing the punctuation characters.</p>
*
* @return an <code>Alphabet</code> containing the punctuation characters.
*/
public static PosixClass punct() {
return new PosixClass(0x0040);
}
/**
* <p>Creates an <code>Alphabet</code> containing the graphical characters.</p>
*
* @return an <code>Alphabet</code> containing the graphical characters.
*/
public static PosixClass graph() {
return new PosixClass(0x0080);
}
/**
* <p>Creates an <code>Alphabet</code> containing the printable characters.</p>
*
* @return an <code>Alphabet</code> containing the printable characters.
*/
public static PosixClass print() {
return new PosixClass(0x0100);
}
/**
* <p>Creates an <code>Alphabet</code> containing the blank characters.</p>
*
* @return an <code>Alphabet</code> containing the blank characters.
*/
public static PosixClass blank() {
return new PosixClass(0x0200);
}
/**
* <p>Creates an <code>Alphabet</code> containing the space characters.</p>
*
* @return an <code>Alphabet</code> containing the space characters.
*/
public static PosixClass space() {
return new PosixClass(0x0400);
}
/**
* <p>Creates an <code>Alphabet</code> containing the control characters.</p>
*
* @return an <code>Alphabet</code> containing the control characters.
*/
public static PosixClass cntrl() {
return new PosixClass(0x0800);
}
/**
* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
*
* @param a the character whose status is requested.
* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
*/
boolean contains(char a) {
int UPPER = 0x0001; int LOWER = 0x0002;
int ALPHA = 0x0004; int DIGIT = 0x0008;
int XDIGIT = 0x0010; int ALNUM = 0x0020;
int PUNCT = 0x0040; int GRAPH = 0x0080;
int PRINT = 0x0100; int BLANK = 0x0200;
int SPACE = 0x0400; int CNTRL = 0x0800;
int classes = 0;
switch (Character.getType(a)) {
default: break;
case Character.UPPERCASE_LETTER:
classes |= UPPER | ALPHA | (('A' <= a && a <= 'F') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break;
case Character.LOWERCASE_LETTER:
classes |= LOWER | ALPHA | (('a' <= a && a <= 'f') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break;
case Character.TITLECASE_LETTER:
case Character.MODIFIER_LETTER:
case Character.OTHER_LETTER:
classes |= ALPHA | ALNUM | GRAPH | PRINT; break;
case Character.NON_SPACING_MARK:
case Character.COMBINING_SPACING_MARK:
case Character.ENCLOSING_MARK:
classes |= PUNCT | GRAPH | PRINT; break;
case Character.DECIMAL_DIGIT_NUMBER:
classes |= DIGIT | XDIGIT | ALNUM | GRAPH | PRINT; break;
case Character.LETTER_NUMBER:
case Character.OTHER_NUMBER:
classes |= ALNUM | GRAPH | PRINT; break;
case Character.CONNECTOR_PUNCTUATION:
case Character.DASH_PUNCTUATION:
case Character.START_PUNCTUATION:
case Character.END_PUNCTUATION:
case Character.INITIAL_QUOTE_PUNCTUATION:
case Character.FINAL_QUOTE_PUNCTUATION:
case Character.OTHER_PUNCTUATION:
case Character.MATH_SYMBOL:
case Character.CURRENCY_SYMBOL:
case Character.MODIFIER_SYMBOL:
case Character.OTHER_SYMBOL:
classes |= PUNCT | GRAPH | PRINT; break;
case Character.SPACE_SEPARATOR:
classes |= PRINT | BLANK | SPACE; break;
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
break;
case Character.CONTROL:
classes |= ((a == '\t') ? BLANK : 0) | ((a == '\t' || a == '\n' || a == '\013' || a == '\f' || a == '\r') ? SPACE : 0) | CNTRL; break;
case Character.FORMAT:
case Character.SURROGATE:
case Character.PRIVATE_USE:
case Character.UNASSIGNED:
break;
}
return (classes & posixClass) != 0;
}
/**
* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Alphabet</code>.
*/
public Object clone() {
return new PosixClass(posixClass);
}
}
//UnicodeCategory
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a Unicode general category.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class UnicodeCategory extends Alphabet {
/**
* <p>The byte representing the Unicode general category.</p>
*/
private final byte category;
/**
* <p>Constructs an <code>Alphabet</code> containing the characters in a Unicode general category, and builds the NFA constructed from this <code>Expression</code>. The class {@link Character} defines byte constants representing each of the Unicode general categories.</p>
*
* @param category The byte representing the Unicode general category.
* @see Character
*/
public UnicodeCategory(byte category) {
this.category = category;
put(i = s(), this, f = s());
}
/**
* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
*
* @param a the character whose status is requested.
* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
*/
boolean contains(char a) {
return Character.getType(a) == category;
}
/**
* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Alphabet</code>.
*/
public Object clone() {
return new UnicodeCategory(category);
}
}
//Repetition
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the repetition of a regular language.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class Repetition extends Expression {
/**
* <p>The operand <code>Expression</code>.</p>
*/
private final Expression e1;
/**
* <p>The minimum number of times <code>e1</code> is repeated.</p>
*/
private final int min;
/**
* <p>The maximum number of times <code>e1</code> is repeated.</p>
*/
private final int max;
/**
* <p>Constructs an <code>Expression</code> expressing the repetition of a regular language, and builds the NFA constructed from this <code>Expression</code>. Large finite values for the minimum or maximum cause the NFA constructed from the operand <code>Expression</code> to be copied many times, resulting in a space-inefficient NFA.</p>
*
* @param e1 the operand <code>Expression</code>.
* @param min the minimum number of times <code>e1</code> is repeated. If negative, it is assumed to be zero.
* @param max the maximum number of times <code>e1</code> is repeated. If negative, it is assumed to be infinity.
*/
public Repetition(Expression e1, int min, int max) {
this.e1 = e1 = (Expression)e1.clone();
this.min = min = Math.max(min, 0);
this.max = max;
i = (min > 0) ? e1.i : s();
f = (min > 0) ? e1.f : i;
if (min == 0 && max < 0) {
put(i, null, e1.i);
put(e1.f, null, i);
}
else {
for (int k = 2; k <= min; k++) {
e1 = (Expression)e1.clone();
put(f, null, e1.i);
f = e1.f;
}
if (max > min) {
Integer tail = f;
put(tail, null, f = s());
for (int k = min+1; k <= max; k++) {
if (k > 1) e1 = (Expression)e1.clone();
put(tail, null, e1.i);
put(tail = e1.f, null, f);
}
}
else if (max < 0) put(f, null, e1.i);
}
}
/**
* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Expression</code>.
*/
public Object clone() {
return new Repetition(e1, min, max);
}
}
//Concatenation
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the concatenation of two regular languages.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class Concatenation extends Expression {
/**
* <p>The left operand <code>Expression</code>.</p>
*/
private final Expression e1;
/**
* <p>The right operand <code>Expression</code>.</p>
*/
private final Expression e2;
/**
* <p>Constructs an <code>Expression</code> expressing the concatenation of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param e1 the left operand <code>Expression</code>.
* @param e2 the right operand <code>Expression</code>.
*/
public Concatenation(Expression e1, Expression e2) {
this.e1 = e1 = (Expression)e1.clone();
this.e2 = e2 = (Expression)e2.clone();
i = e1.i;
f = e2.f;
put(e1.f, null, e2.i);
}
/**
* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Expression</code>.
*/
public Object clone() {
return new Concatenation(e1, e2);
}
}
//Singleton
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a singleton language.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class Singleton extends Expression {
/**
* <p>The string whose singleton language is expressed.</p>
*/
private final String x;
/**
* <p>Constructs an <code>Expression</code> expressing a singleton language, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param x the string whose singleton language is expressed.
*/
public Singleton(String x) {
this.x = x;
f = i = s();
for (char c : x.toCharArray())
new Match(f, c, f = s());
}
/**
* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Expression</code>.
*/
public Object clone() {
return new Singleton(x);
}
}
//Union
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the union of two regular languages.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public static class Union extends Expression {
/**
* <p>The left operand <code>Expression</code>.</p>
*/
private final Expression e1;
/**
* <p>The right operand <code>Expression</code>.</p>
*/
private final Expression e2;
/**
* <p>Constructs an <code>Expression</code> expressing the union of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p>
*
* @param e1 the left operand <code>Expression</code>.
* @param e2 the right operand <code>Expression</code>.
*/
public Union(Expression e1, Expression e2) {
this.e1 = e1 = (Expression)e1.clone();
this.e2 = e2 = (Expression)e2.clone();
i = s();
f = s();
put(i, null, e1.i); put(e1.f, null, f);
put(i, null, e2.i); put(e2.f, null, f);
}
/**
* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
*
* @return a clone of this <code>Expression</code>.
*/
public Object clone() {
return new Union(e1, e2);
}
}
//expression(ere)
/**
* <p>Creates an <code>Expression</code> by interpreting a POSIX extended regular expression (ERE), as used in egrep. The syntax and semantics for EREs is formally specified by the <a href="../../../src/gi/ERE.java">ERE <code>Grammar</code></a>. Provides a convenient method for constructing an <code>Expression</code>, at the cost of an LR(1) parse. Implementations seeking maximum speed should avoid this method and use explicit <code>Expression</code> subclass constructors; for example,</p>
* <blockquote><code>new Union(new NonMatch("0"), new Singleton("foo"))</code></blockquote>
* instead of
* <blockquote><code>expression("[^0]|foo")</code></blockquote>
*
* @param ere the POSIX extended regular expression (ERE) to interpret.
* @return the <code>Expression</code> constructed by interpreting <code>ere</code>.
* @throws Lexicon.Exception if an ERE syntax error occurs.
*/
public static Expression expression(String ere) throws Exception {
return ERE.expression(ere);
}
//E
/**
* <p>The mapping representing this <code>Lexicon</code>. A terminal is mapped to the initial state of the NFA constructed from the associated <code>Expression</code>.</p>
*/
private final Map<Object, Expression> E;
/**
* <p>Puts a terminal and associated <code>Expression</code> into this <code>Lexicon</code>. The <code>Expression</code> supersedes any previously associated with the terminal.</p>
*
* @param a the terminal to add to this <code>Lexicon</code>.
* @param e the <code>Expression</code> associated with terminal <code>a</code>. When grabbing, the language expressed by <code>e</code> matches <code>a</code>.
*/
public void put(Object a, Expression e) {
E.put(a, e);
I.clear();
F.clear();
}
/**
* <p>Indicates whether a symbol is a terminal in this <code>Lexicon</code>.</p>
*
* @param a the symbol whose status is requested.
* @return <code>true</code> if <code>a</code> is a terminal in this <code>Lexicon</code>; <code>false</code> otherwise.
*/
boolean terminal(Object a) {
return E.containsKey(a);
}
//Lexicon()
/**
* <p>The terminal matched by the character at the end of a source stream.</p>
* @since 1.1, renames <code>END_OF_SOURCE</code> in version 1.0.
*/
protected static final Object $ = new String("$");
/**
* <p>The <code>Alphabet</code> containing the character at the end of a source stream.</p>
*/
private static final Expression $_EXPRESSION = new Match((char)-1);
/**
* <p>Constructs an empty <code>Lexicon</code>.</p>
*/
protected Lexicon() {
E = new HashMap<Object, Expression>(500);
I = new Set<Integer>(-200);
F = new HashMap<Integer, Object>(500);
put($, $_EXPRESSION);
}
/**
* <p>Constructs a <code>Lexicon</code> that is a shallow copy of <code>lexicon</code>. The fields of the new <code>Lexicon</code> refer to the same elements as those in <code>lexicon</code>.</p>
*
*
* @param lexicon the <code>Lexicon</code> to copy.
*/
Lexicon(Lexicon lexicon) {/*debug*/
debug = lexicon.debug;/*off*/
E = lexicon.E;
I = lexicon.I;
F = lexicon.F;
}
//Lexicon.initial
/**
* <p>Returns the initial states of the lexical NFA.</p>
*
* @return {@link #I}, computing it and {@link #F} if there is a need to compute the current initial states and final states.
*/
private Set<Integer> initial() {
if (I.isEmpty()) {
for (Object a : E.keySet()) {
Expression e = E.get(a);
I.add(e.i);
F.put(e.f, a);
}
closure(I);
}
return I;
}
//accept
/**
* <p>Computes the current final state, if any, in the lexical NFA.</p>
*
* @param S the current states.
* @return the maximum final state in <code>S</code>. Returns <code>null</code> if <code>S</code> contains no final states.
*/
private Integer accept(Set<Integer> S) {
Integer
f = null;
for (Integer s : S)
if (F.containsKey(s))
if (f == null || f < s) f = s;
return f;
}
/**
* <p>This class implements an {@link Lexicon.Exception <code>Exception</code>}.</p>
*
* @version 1.3
* @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>>
*/
public class Exception extends java.lang.Exception {
/**
* <p>The extended error message.</p>
*/
private StringBuffer message;
/**
* <p>Constructs an <code>Exception</code> with a message.</p>
*
* @param message the error message.
*/
public Exception(String message) {
super(message);
}
/**
* <p>Returns the error message.</p>
*
* @return the error message.
*/
public String getMessage() {
return (message == null) ? super.getMessage() : message.toString();
}
/**
* <p>Extends the error message in this <code>Exception</code>. The extended message includes the line number, message and source characters following the error.</p>
*
* @param source the source character stream.
* @return this <code>Exception</code> with an extended message.
*/
Exception extend(LineNumberReader source) {
if (message == null) message = new StringBuffer(132);
else message.setLength(0);
message.append("line ");
message.append(source.getLineNumber()+1);
message.append(": ");
message.append(super.getMessage());
message.append(System.getProperty("line.separator"));
message.append("...");
message.append(word());
try {
String rest = source.readLine();
if (rest != null) message.append(rest);
}
catch (IOException exception) {}
message.append(System.getProperty("line.separator"));
message.append(" ^");
return this;
}
}
//Lexicon.grab
/**
* <p>The states through which the lexical NFA transitions.</p>
*/
private final Set<Integer>[] R = (Set<Integer>[])new Set<?>[]{new Set<Integer>(-200), new Set<Integer>(-200)};
/**
* <p>The <code>StringBuffer</code> containing the word most recently grabbed.</p>
*/
private final StringBuffer w = new StringBuffer(4000);
/**
* <p>Grabs a terminal from a source character stream using this <code>Lexicon</code>. The variable returned by {@link #word()} is set to the longest nonempty prefix of the remaining source characters matching an <code>Expression</code> in this <code>Lexicon</code>. If no nonempty prefix matches an <code>Expression</code>, a <code>Lexicon.Exception</code> is thrown. If the longest matching prefix matches more than one <code>Expression</code>, the terminal associated with the <code>Expression</code> most recently constructed is returned. Blocks until a character is available, an I/O error occurs, or the end of the source stream is reached.</p>
*
* @param source the source character stream.
* @return the terminal grabbed from <code>source</code>.
* @throws Lexicon.Exception if an I/O or lexical error occurs.
*/
protected Object grab(LineNumberReader source) throws Exception {
Set<Integer> S = initial();
w.setLength(0);
int wLength = 0;
Object b = null;
try {
source.mark(w.capacity());
do {
int a = source.read();
S = closure(transition(S, (char)a, R[w.length() % 2]));
if (S.isEmpty()) break;
if (a != -1) w.append((char)a); else w.append($);
Integer f = accept(S);
if (f != null) {
wLength = w.length();
b = F.get(f);
source.mark(w.capacity());
}
} while (b != $);
w.setLength(wLength);
source.reset();
}
catch (IOException exception) {
throw new Exception(exception.getMessage());
}
if (wLength == 0) throw new Exception("lexical error").extend(source);
return b;
}
/**
* <p>Returns the word most recently grabbed using this <code>Lexicon</code>.</p>
*
* @return the word most recently grabbed by {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>}.
*/
protected String word() {
return w.substring(0);
}
//Lexicon.interpret
/**
* <p>Repeatedly invokes {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>} until the end of the source stream reached. Blocks until a character is available, or an I/O error occurs. This method is overridden by <code>Grammar</code> and its parser subclasses, so it is only invoked when this <code>Lexicon</code> has not been extended into a <code>Grammar</code> or parser.</p>
*
* @param source the source character stream.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. A <code>Lexicon</code> always returns null.
* @throws Lexicon.Exception if an I/O or lexical error occurs.
*/
Object interpret(LineNumberReader source) throws Exception {/*debug*/
if ((debug & TERMINALS) > 0) System.out.println(
"----terminals\n\t" + E.keySet().toString().replaceFirst("\\[", "{").replaceAll(", ", " ").replaceFirst("\\]$", "}\n----------"));/*off*/
for (Object a; (a = grab(source)) != $;)/*debug*/
if ((debug & LEXICAL) > 0) System.out.println(
a + (!a.equals(word()) ? " " + word() : ""))/*off*/
;
return null;
}
/**
* <p>Interprets a source character stream using this <code>Lexicon</code>.</p>
*
* @param source the source character stream.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(Reader source) throws Exception {
return interpret(new LineNumberReader(source));
}
/**
* <p>Interprets a source string using this <code>Lexicon</code>.</p>
*
* @param source the source string.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(String source) throws Exception {
return interpret(new StringReader(source));
}
/**
* <p>Interprets a source byte stream using this <code>Lexicon</code>.</p>
*
* @param source the source byte stream.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(InputStream source) throws Exception {
return interpret(new InputStreamReader(source));
}
/**
* <p>Interprets the standard input stream using this <code>Lexicon</code>.</p>
*
* @return the <code>ParseTree</code> constructed by interpreting the standard input stream.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret() throws Exception {
return interpret(System.in);
}
/**
* <p>Interprets a source file using this <code>Lexicon</code>.</p>
*
* @param source the source file.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws FileNotFoundException if the source file cannot be found.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(File source) throws FileNotFoundException, Exception {
return interpret(new FileReader(source));
}
/**
* <p>Interprets a source pipe using this <code>Lexicon</code>.</p>
*
* @param source the source pipe.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws IOException if the source pipe cannot be connected.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(PipedWriter source) throws IOException, Exception {
return interpret(new PipedReader(source));
}
//Lexicon.interpret(arguments)
/**
* <p>The debug switches, initially zero. The following bits enable debugging to standard output:</p>
* <blockquote><dl>
* <dt><code>0x01</code> = <code>TERMINALS</code></dt>
* <dd>Print the set of terminals before lexical analysis</dd>
* <dt><code>0x02</code> = <code>LEXICAL</code> </dt>
* <dd>Print terminals and associated words grabbed during lexical analysis</dd>
* <dt><code>0x04</code> = <code>FIRST_FOLLOW</code></dt>
* <dd>Print first and follow sets precomputed during syntax analysis</dd>
* <dt><code>0x08</code> = <code>SYNTAX</code></dt>
* <dd>Print parsing decisions made during syntax analysis</dd>
* <dt><code>0x10</code> = <code>CONFLICT</code></dt>
* <dd>Print parsing conflicts encountered during syntax analysis</dd>
* <dt><code>0x20</code> = <code>PARSE_TREE</code></dt>
* <dd>Print each <code>ParseTree</code> produced by syntax analysis</dd>
* </dl></blockquote>
* @since 1.1
*/
protected int debug = 0;
/**
* <p>{@link #debug <code>debug</code>} switch constant enabling printing the set of terminals before lexical analysis.</p>
* @since 1.1
*/
protected static final int TERMINALS = 0x01;
/**
* <p>{@link #debug <code>debug</code>} switch constant enabling printing terminals and associated words grabbed during lexical analysis.</p>
* @since 1.1
*/
protected static final int LEXICAL = 0x02;
/**
* <p>{@link #debug <code>debug</code>} switch constant enabling all debugging.</p>
* @since 1.1
*/
protected static final int VERBOSE = 0xFF;
}
Began life as a copy of #2000391
Snippet is not live.
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #2000392 |
| Snippet name: | class Lexicon |
| Eternal ID of this version: | #2000392/1 |
| Text MD5: | a6cf445ca2847d210aa478b3dc888c7f |
| Author: | stefan |
| Category: | javax |
| Type: | New Tinybrain snippet |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-06-26 21:06:10 |
| Source code size: | 41054 bytes / 1205 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 858 / 189 |
| Referenced in: | #2000393 - class Lexicon #3000188 - Answer for stefanreich(>> t search) #3000189 - Answer for stefanreich(>> t bla) #3000190 - Answer for stefanreich(>> t 20 questions) #3000382 - Answer for ferdie (>> t = 1, f = 0) #3000383 - Answer for funkoverflow (>> t=1, f=0 okay) |