Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

1209
LINES

< > BotCompany Repo | #2000391 // class Lexicon

New Tinybrain snippet

/**
* <p>This class implements a {@link Lexicon}.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
class Lexicon {
//Q
/**
* <p>The number of lexical NFA states constructed.</p>
*/
private static int QSize = 0;

/**
* <p>Creates a new state in the lexical NFA.</p>
*
* @return a new state in the lexical NFA.
*/
private static Integer s() {
	return ++QSize;
}
//delta
/**
* <p>The transition relation of the lexical NFA.</p>
*/
private static final Stack<Stack<Object>> delta = new Stack<Stack<Object>>();

/**
* <p>Puts a transition into the lexical NFA.</p>
*
* @param s the state from which the transition is made.
* @param A the <code>Alphabet</code> on which the transition is made.
* @param r the state to which the transition is made.
*/
private static void put(Integer s, Alphabet A, Integer r) {

	if (Math.max(s,r) >= delta.size()) delta.setSize(Math.max(s,r)+1);

	Stack<Object> pairs = delta.get(s);
	if (pairs == null) delta.set(s, pairs = new Stack<Object>());

	pairs.push(A);
	pairs.push(r);
}
//Set
/**
* <p>This class implements a {@link Lexicon.Set <code>Set</code>}.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
* @param <E> the element type.
*/
static class Set<E> extends Stack<E> {

	/**
	* <p>The null exclusion indicator. If <code>true</code>, <code>add</code> methods will not add <code>null</code> to this <code>Set</code>.</p>
	*/
	private final boolean excludeNull;

	/**
	* <p>Constructs a <code>Set</code> with an initial capacity.</p>
	*
	* @param capacity the initial capacity. The magnitude of <code>capacity</code> is the initial capacity. The null exclusion indicator is initialized to <code>true</code> if <code>capacity</code> is negative.
	*/
	Set(int capacity) {
		super();
		ensureCapacity(Math.abs(capacity));
		excludeNull = (capacity < 0);
	}

	/**
	* <p>Adds an element to this <code>Set</code>. The element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p>
	*
	* @param element the element to add to this <code>Set</code>.
	* @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise.
	*/
	public boolean add(E element) {
		if (excludeNull && element == null || contains(element)) return false;
		push(element);
		return true;
	}

	/**
	* <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p>
	*
	* @param index the index in <code>S</code> beyond which elements are added.
	* @param S the <code>Set</code> to add to this <code>Set</code>.
	* @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise.
	*/
	boolean add(int index, Set<E> S) {
		if (S == null) return false;
		boolean push = isEmpty();
		boolean add = false;

		for (int i = index; i < S.size(); i++) {
			E element = S.get(i);

			if (!(excludeNull && element == null))
				if (push) {
					push(element);
					add = true;
				}
				else if (add(element))
					add = true;
		}
		return add;
	}

	/**
	* <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p>
	*
	* @param S the <code>Set</code> to add to this <code>Set</code>.
	* @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise.
	*/
	boolean add(Set<E> S) {
		return add(0, S);
	}

	public String toString() {
		StringBuffer result = new StringBuffer(80);
		result.append('{');

		for (int i = 0; i < size(); i++) {
			if (i > 0) result.append(' ');
			result.append(get(i));
		}
		result.append('}');
		return result.toString();
	}
//Set
}
//I
/**
* <p>The initial states of the lexical NFA. When empty, there is a need to compute the current initial states. It is computed only on demand created by {@link #initial()}.</p>
*/
private final Set<Integer> I;
//F
/**
* <p>The final states of the lexical NFA. A final state is mapped to the terminal it accepts in this <code>Lexicon</code>. When empty, there is a need to compute current final states. It is computed only on demand created by {@link #initial()}.</p>
*/
private final Map<Integer, Object> F;
//Lexicon.transition
/**
* <p>Computes a transition using the lexical NFA.</p>
*
* @param S the states from which the transition is made.
* @param a the character on which the transition is made.
* @param R the states to which the transition is made.
* @return the states to which the transition is made.
*/
private static Set<Integer> transition(Set<Integer> S, char a, Set<Integer> R) {
	R.clear();

	for (Integer s : S) {
		Stack<Object> pairs = delta.get(s);

		if (pairs != null)
			for (int k = 0; k < pairs.size(); k += 2) {
				Alphabet A = (Alphabet)pairs.get(k);

				if (A != null) {
					Integer r = (Integer)pairs.get(k+1);
					if (A.contains(a)) R.add(r);
				}
			}
	}
	return R;
}
//Lexicon.closure
/**
* <p>Computes a reflexive transitive closure under empty transition using the lexical NFA. The closure is computed in place by a breadth-first search expanding <code>S</code>.</p>
*
* @param S the states whose reflexive transitive closure is computed under empty transition.
* @return the reflexive transitive closure of <code>S</code> under empty transition.
*/
private static Set<Integer> closure(Set<Integer> S) {

	for (int i = 0; i < S.size(); i++) {
		Integer s = S.get(i);
		Stack<Object> pairs = delta.get(s);

		if (pairs != null)
			for (int k = 0; k < pairs.size(); k += 2) {
				Alphabet A = (Alphabet)pairs.get(k);

				if (A == null) {
					Integer r = (Integer)pairs.get(k+1);
					S.add(r);
				}
			}
	}
	return S;
}
//Expression
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a regular language.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
abstract public static class Expression implements Cloneable {

	/**
	* <p>The initial state of the NFA constructed from this <code>Expression</code>.</p>
	*/
	Integer i;
	/**
	* <p>The final state of the NFA constructed from this <code>Expression</code>.</p>
	*/
	Integer f;

	/**
	* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Expression</code>.
	*/
	abstract public Object clone();
}
//Alphabet
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} of character symbols.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
abstract public static class Alphabet extends Expression {

	/**
	* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
	*
	* @param a the character whose status is requested.
	* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
	*/
	abstract boolean contains(char a);
}
//Match
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing some characters.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class Match extends Alphabet {

	/**
	* <p>The {@link Character} or {@link String} representing this <code>Alphabet</code>.</p>
	*/
	final Object A;

	/**
	* <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param i the initial state of the NFA constructed.
	* @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>.
	* @param f the final state of the NFA constructed.
	*/
	private Match(Integer i, Object A, Integer f) {
		this.A = A;
		put(this.i = i, this, this.f = f);
	}

	/**
	* <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param i the initial state of the NFA constructed.
	* @param a the character in this <code>Alphabet</code>.
	* @param f the final state of the NFA constructed.
	*/
	private Match(Integer i, char a, Integer f) {
		this(i, new Character(a), f);
	}

	/**
	* <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param a the character in this <code>Alphabet</code>.
	*/
	public Match(char a) {
		this(s(), a, s());
	}

	/**
	* <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>.
	*/
	public Match(Object A) {
		this(s(), A, s());
	}

	/**
	* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
	*
	* @param a the character whose status is requested.
	* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
	*/
	boolean contains(char a) {
		if (A instanceof Character)
			return (Character)A == a;

		if (A instanceof String)
			return ((String)A).indexOf(a) != -1;

		if (A instanceof Stack<?>)
			for (Alphabet alphabet : (Stack<Alphabet>)A)
				if (alphabet.contains(a)) return true;
		return false;
	}

	/**
	* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Alphabet</code>.
	*/
	public Object clone() {
		return new Match(A);
	}
}
//NonMatch
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing all except some characters.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class NonMatch extends Match {

	/**
	* <p>Constructs an <code>Alphabet</code> containing all characters except one, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param a the character not in this <code>Alphabet</code>.
	*/
	public NonMatch(char a) {
		super(a);
	}

	/**
	* <p>Constructs an <code>Alphabet</code> containing all characters except some, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param A the {@link Character} or {@link String} of characters not in this <code>Alphabet</code>.
	*/
	public NonMatch(Object A) {
		super(A);
	}

	/**
	* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
	*
	* @param a the character whose status is requested.
	* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
	*/
	boolean contains(char a) {
		return a != (char)-1 && !super.contains(a);
	}

	/**
	* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Alphabet</code>.
	*/
	public Object clone() {
		return new NonMatch(A);
	}
}
//Range
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a range.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class Range extends Alphabet {

	/**
	* <p>The first character in the range.</p>
	*/
	private final char a1;
	/**
	* <p>The last character in the range.</p>
	*/
	private final char a2;

	/**
	* <p>Constructs an <code>Alphabet</code> containing the characters in a range, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param a1 the first character in the range.
	* @param a2 the last character in the range.
	*/
	public Range(char a1, char a2) {
		this.a1 = a1;
		this.a2 = a2;
		put(i = s(), this, f = s());
	}

	/**
	* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
	*
	* @param a the character whose status is requested.
	* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
	*/
	boolean contains(char a) {
		return a1 <= a && a <= a2;
	}

	/**
	* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Alphabet</code>.
	*/
	public Object clone() {
		return new Range(a1, a2);
	}
}

/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a POSIX character class.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class PosixClass extends Alphabet {

	/**
	* <p>The bit mask representing this <code>PosixClass</code>.</p>
	*/
	private final int posixClass;

	/**
	* <p>Constructs an <code>Alphabet</code> containing the characters in a POSIX character class, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param posixClass the bit mask representing this <code>PosixClass</code>.
	*/
	private PosixClass(int posixClass) {
		this.posixClass = posixClass;
		put(i = s(), this, f = s());
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the uppercase alphabetic characters.</p>
	*
	* @return an <code>Alphabet</code> containing the uppercase alphabetic characters.
	*/
	public static PosixClass upper() {
		return new PosixClass(0x0001);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the lowercase alphabetic characters.</p>
	*
	* @return an <code>Alphabet</code> containing the lowercase alphabetic characters.
	*/
	public static PosixClass lower() {
		return new PosixClass(0x0002);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the alphabetic characters.</p>
	*
	* @return an <code>Alphabet</code> containing the alphabetic characters.
	*/
	public static PosixClass alpha() {
		return new PosixClass(0x0004);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the decimal digit characters.</p>
	*
	* @return an <code>Alphabet</code> containing the decimal digit characters.
	*/
	public static PosixClass digit() {
		return new PosixClass(0x0008);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the hexadecimal digit characters.</p>
	*
	* @return an <code>Alphabet</code> containing the hexadecimal digit characters.
	*/
	public static PosixClass xdigit() {
		return new PosixClass(0x0010);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the alphanumeric characters.</p>
	*
	* @return an <code>Alphabet</code> containing the alphanumeric characters.
	*/
	public static PosixClass alnum() {
		return new PosixClass(0x0020);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the punctuation characters.</p>
	*
	* @return an <code>Alphabet</code> containing the punctuation characters.
	*/
	public static PosixClass punct() {
		return new PosixClass(0x0040);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the graphical characters.</p>
	*
	* @return an <code>Alphabet</code> containing the graphical characters.
	*/
	public static PosixClass graph() {
		return new PosixClass(0x0080);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the printable characters.</p>
	*
	* @return an <code>Alphabet</code> containing the printable characters.
	*/
	public static PosixClass print() {
		return new PosixClass(0x0100);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the blank characters.</p>
	*
	* @return an <code>Alphabet</code> containing the blank characters.
	*/
	public static PosixClass blank() {
		return new PosixClass(0x0200);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the space characters.</p>
	*
	* @return an <code>Alphabet</code> containing the space characters.
	*/
	public static PosixClass space() {
		return new PosixClass(0x0400);
	}

	/**
	* <p>Creates an <code>Alphabet</code> containing the control characters.</p>
	*
	* @return an <code>Alphabet</code> containing the control characters.
	*/
	public static PosixClass cntrl() {
		return new PosixClass(0x0800);
	}

	/**
	* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
	*
	* @param a the character whose status is requested.
	* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
	*/
	boolean contains(char a) {
		int UPPER = 0x0001; int LOWER = 0x0002;
		int ALPHA = 0x0004; int DIGIT = 0x0008;
		int XDIGIT = 0x0010; int ALNUM = 0x0020;
		int PUNCT = 0x0040; int GRAPH = 0x0080;
		int PRINT = 0x0100; int BLANK = 0x0200;
		int SPACE = 0x0400; int CNTRL = 0x0800;
		int classes = 0;

		switch (Character.getType(a)) {
			default: break;
			case Character.UPPERCASE_LETTER:
				classes |= UPPER | ALPHA | (('A' <= a && a <= 'F') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break;
			case Character.LOWERCASE_LETTER:
				classes |= LOWER | ALPHA | (('a' <= a && a <= 'f') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break;
			case Character.TITLECASE_LETTER:
			case Character.MODIFIER_LETTER:
			case Character.OTHER_LETTER:
				classes |= ALPHA | ALNUM | GRAPH | PRINT; break;
			case Character.NON_SPACING_MARK:
			case Character.COMBINING_SPACING_MARK:
			case Character.ENCLOSING_MARK:
				classes |= PUNCT | GRAPH | PRINT; break;
			case Character.DECIMAL_DIGIT_NUMBER:
				classes |= DIGIT | XDIGIT | ALNUM | GRAPH | PRINT; break;
			case Character.LETTER_NUMBER:
			case Character.OTHER_NUMBER:
				classes |= ALNUM | GRAPH | PRINT; break;
			case Character.CONNECTOR_PUNCTUATION:
			case Character.DASH_PUNCTUATION:
			case Character.START_PUNCTUATION:
			case Character.END_PUNCTUATION:
			case Character.INITIAL_QUOTE_PUNCTUATION:
			case Character.FINAL_QUOTE_PUNCTUATION:
			case Character.OTHER_PUNCTUATION:
			case Character.MATH_SYMBOL:
			case Character.CURRENCY_SYMBOL:
			case Character.MODIFIER_SYMBOL:
			case Character.OTHER_SYMBOL:
				classes |= PUNCT | GRAPH | PRINT; break;
			case Character.SPACE_SEPARATOR:
				classes |= PRINT | BLANK | SPACE; break;
			case Character.LINE_SEPARATOR:
			case Character.PARAGRAPH_SEPARATOR:
				break;
			case Character.CONTROL:
				classes |= ((a == '\t') ? BLANK : 0) | ((a == '\t' || a == '\n' || a == '\013' || a == '\f' || a == '\r') ? SPACE : 0) | CNTRL; break;
			case Character.FORMAT:
			case Character.SURROGATE:
			case Character.PRIVATE_USE:
			case Character.UNASSIGNED:
				break;
		}
		return (classes & posixClass) != 0;
	}

	/**
	* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Alphabet</code>.
	*/
	public Object clone() {
		return new PosixClass(posixClass);
	}
}
//UnicodeCategory
/**
* <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a Unicode general category.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class UnicodeCategory extends Alphabet {

	/**
	* <p>The byte representing the Unicode general category.</p>
	*/
	private final byte category;

	/**
	* <p>Constructs an <code>Alphabet</code> containing the characters in a Unicode general category, and builds the NFA constructed from this <code>Expression</code>. The class {@link Character} defines byte constants representing each of the Unicode general categories.</p>
	*
	* @param category The byte representing the Unicode general category.
	* @see Character
	*/
	public UnicodeCategory(byte category) {
		this.category = category;
		put(i = s(), this, f = s());
	}

	/**
	* <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p>
	*
	* @param a the character whose status is requested.
	* @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise.
	*/
	boolean contains(char a) {
		return Character.getType(a) == category;
	}

	/**
	* <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Alphabet</code>.
	*/
	public Object clone() {
		return new UnicodeCategory(category);
	}
}
//Repetition
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the repetition of a regular language.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class Repetition extends Expression {

	/**
	* <p>The operand <code>Expression</code>.</p>
	*/
	private final Expression e1;
	/**
	* <p>The minimum number of times <code>e1</code> is repeated.</p>
	*/
	private final int min;
	/**
	* <p>The maximum number of times <code>e1</code> is repeated.</p>
	*/
	private final int max;

	/**
	* <p>Constructs an <code>Expression</code> expressing the repetition of a regular language, and builds the NFA constructed from this <code>Expression</code>. Large finite values for the minimum or maximum cause the NFA constructed from the operand <code>Expression</code> to be copied many times, resulting in a space-inefficient NFA.</p>
	*
	* @param e1 the operand <code>Expression</code>.
	* @param min the minimum number of times <code>e1</code> is repeated. If negative, it is assumed to be zero.
	* @param max the maximum number of times <code>e1</code> is repeated. If negative, it is assumed to be infinity.
	*/
	public Repetition(Expression e1, int min, int max) {
		this.e1 = e1 = (Expression)e1.clone();
		this.min = min = Math.max(min, 0);
		this.max = max;

		i = (min > 0) ? e1.i : s();
		f = (min > 0) ? e1.f : i;

		if (min == 0 && max < 0) {
			put(i, null, e1.i);
			put(e1.f, null, i);
		}
		else {
			for (int k = 2; k <= min; k++) {
				e1 = (Expression)e1.clone();
				put(f, null, e1.i);
				f = e1.f;
			}
			if (max > min) {
				Integer tail = f;
				put(tail, null, f = s());

				for (int k = min+1; k <= max; k++) {
					if (k > 1) e1 = (Expression)e1.clone();
					put(tail, null, e1.i);
					put(tail = e1.f, null, f);
				}
			}
			else if (max < 0) put(f, null, e1.i);
		}
	}

	/**
	* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Expression</code>.
	*/
	public Object clone() {
		return new Repetition(e1, min, max);
	}
}
//Concatenation
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the concatenation of two regular languages.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class Concatenation extends Expression {

	/**
	* <p>The left operand <code>Expression</code>.</p>
	*/
	private final Expression e1;
	/**
	* <p>The right operand <code>Expression</code>.</p>
	*/
	private final Expression e2;

	/**
	* <p>Constructs an <code>Expression</code> expressing the concatenation of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param e1 the left operand <code>Expression</code>.
	* @param e2 the right operand <code>Expression</code>.
	*/
	public Concatenation(Expression e1, Expression e2) {
		this.e1 = e1 = (Expression)e1.clone();
		this.e2 = e2 = (Expression)e2.clone();

		i = e1.i;
		f = e2.f;

		put(e1.f, null, e2.i);
	}

	/**
	* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Expression</code>.
	*/
	public Object clone() {
		return new Concatenation(e1, e2);
	}
}
//Singleton
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a singleton language.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class Singleton extends Expression {

	/**
	* <p>The string whose singleton language is expressed.</p>
	*/
	private final String x;

	/**
	* <p>Constructs an <code>Expression</code> expressing a singleton language, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param x the string whose singleton language is expressed.
	*/
	public Singleton(String x) {
		this.x = x;

		f = i = s();

		for (char c : x.toCharArray())
			new Match(f, c, f = s());
	}

	/**
	* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Expression</code>.
	*/
	public Object clone() {
		return new Singleton(x);
	}
}
//Union
/**
* <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the union of two regular languages.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public static class Union extends Expression {

	/**
	* <p>The left operand <code>Expression</code>.</p>
	*/
	private final Expression e1;
	/**
	* <p>The right operand <code>Expression</code>.</p>
	*/
	private final Expression e2;

	/**
	* <p>Constructs an <code>Expression</code> expressing the union of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p>
	*
	* @param e1 the left operand <code>Expression</code>.
	* @param e2 the right operand <code>Expression</code>.
	*/
	public Union(Expression e1, Expression e2) {
		this.e1 = e1 = (Expression)e1.clone();
		this.e2 = e2 = (Expression)e2.clone();

		i = s();
		f = s();

		put(i, null, e1.i); put(e1.f, null, f);
		put(i, null, e2.i); put(e2.f, null, f);
	}

	/**
	* <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p>
	*
	* @return a clone of this <code>Expression</code>.
	*/
	public Object clone() {
		return new Union(e1, e2);
	}
}
//expression(ere)
/**
* <p>Creates an <code>Expression</code> by interpreting a POSIX extended regular expression (ERE), as used in egrep. The syntax and semantics for EREs is formally specified by the <a href="../../../src/gi/ERE.java">ERE <code>Grammar</code></a>. Provides a convenient method for constructing an <code>Expression</code>, at the cost of an LR(1) parse. Implementations seeking maximum speed should avoid this method and use explicit <code>Expression</code> subclass constructors; for example,</p>
* <blockquote><code>new Union(new NonMatch("0"), new Singleton("foo"))</code></blockquote>
* instead of
* <blockquote><code>expression("[^0]|foo")</code></blockquote>
*
* @param ere the POSIX extended regular expression (ERE) to interpret.
* @return the <code>Expression</code> constructed by interpreting <code>ere</code>.
* @throws Lexicon.Exception if an ERE syntax error occurs.
*/
public static Expression expression(String ere) throws Exception {
	return ERE.expression(ere);
}
//E
/**
* <p>The mapping representing this <code>Lexicon</code>. A terminal is mapped to the initial state of the NFA constructed from the associated <code>Expression</code>.</p>
*/
private final Map<Object, Expression> E;

/**
* <p>Puts a terminal and associated <code>Expression</code> into this <code>Lexicon</code>. The <code>Expression</code> supersedes any previously associated with the terminal.</p>
*
* @param a the terminal to add to this <code>Lexicon</code>.
* @param e the <code>Expression</code> associated with terminal <code>a</code>. When grabbing, the language expressed by <code>e</code> matches <code>a</code>.
*/
public void put(Object a, Expression e) {
	E.put(a, e);
	I.clear();
	F.clear();
}

/**
* <p>Indicates whether a symbol is a terminal in this <code>Lexicon</code>.</p>
*
* @param a the symbol whose status is requested.
* @return <code>true</code> if <code>a</code> is a terminal in this <code>Lexicon</code>; <code>false</code> otherwise.
*/
boolean terminal(Object a) {
	return E.containsKey(a);
}
//Lexicon()
/**
* <p>The terminal matched by the character at the end of a source stream.</p>
* @since 1.1, renames <code>END_OF_SOURCE</code> in version 1.0.
*/
protected static final Object $ = new String("$");

/**
* <p>The <code>Alphabet</code> containing the character at the end of a source stream.</p>
*/
private static final Expression $_EXPRESSION = new Match((char)-1);

/**
* <p>Constructs an empty <code>Lexicon</code>.</p>
*/
protected Lexicon() {
	E = new HashMap<Object, Expression>(500);
	I = new Set<Integer>(-200);
	F = new HashMap<Integer, Object>(500);
	put($, $_EXPRESSION);
}

/**
* <p>Constructs a <code>Lexicon</code> that is a shallow copy of <code>lexicon</code>. The fields of the new <code>Lexicon</code> refer to the same elements as those in <code>lexicon</code>.</p>
*
 *
* @param lexicon the <code>Lexicon</code> to copy.
*/
Lexicon(Lexicon lexicon) {/*debug*/
	debug = lexicon.debug;/*off*/
	E = lexicon.E;
	I = lexicon.I;
	F = lexicon.F;
}
//Lexicon.initial
/**
* <p>Returns the initial states of the lexical NFA.</p>
*
* @return {@link #I}, computing it and {@link #F} if there is a need to compute the current initial states and final states.
*/
private Set<Integer> initial() {

	if (I.isEmpty()) {

		for (Object a : E.keySet()) {
			Expression e = E.get(a);

			I.add(e.i);
			F.put(e.f, a);
		}
		closure(I);
	}
	return I;
}
//accept
/**
* <p>Computes the current final state, if any, in the lexical NFA.</p>
*
* @param S the current states.
* @return the maximum final state in <code>S</code>. Returns <code>null</code> if <code>S</code> contains no final states.
*/
private Integer accept(Set<Integer> S) {

	Integer
		f = null;

	for (Integer s : S)
		if (F.containsKey(s))
			if (f == null || f < s) f = s;

	return f;
}

/**
* <p>This class implements an {@link Lexicon.Exception <code>Exception</code>}.</p>
*
* @version 1.3
* @author &copy; 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> &lt;<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>&gt;
*/
public class Exception extends java.lang.Exception {

	/**
	* <p>The extended error message.</p>
	*/
	private StringBuffer message;

	/**
	* <p>Constructs an <code>Exception</code> with a message.</p>
	*
	* @param message the error message.
	*/
	public Exception(String message) {
		super(message);
	}

	/**
	* <p>Returns the error message.</p>
	*
	* @return the error message.
	*/
	public String getMessage() {
		return (message == null) ? super.getMessage() : message.toString();
	}

	/**
	* <p>Extends the error message in this <code>Exception</code>. The extended message includes the line number, message and source characters following the error.</p>
	*
	* @param source the source character stream.
	* @return this <code>Exception</code> with an extended message.
	*/
	Exception extend(LineNumberReader source) {
		if (message == null) message = new StringBuffer(132);
		else message.setLength(0);

		message.append("line ");
		message.append(source.getLineNumber()+1);
		message.append(": ");
		message.append(super.getMessage());
		message.append(System.getProperty("line.separator"));
		message.append("...");
		message.append(word());
		try {
			String rest = source.readLine();
			if (rest != null) message.append(rest);
		}
		catch (IOException exception) {}
		message.append(System.getProperty("line.separator"));
		message.append("   ^");
		return this;
	}
}
//Lexicon.grab
/**
* <p>The states through which the lexical NFA transitions.</p>
*/
private final Set<Integer>[] R = (Set<Integer>[])new Set<?>[]{new Set<Integer>(-200), new Set<Integer>(-200)};
/**
* <p>The <code>StringBuffer</code> containing the word most recently grabbed.</p>
*/
private final StringBuffer w = new StringBuffer(4000);

/**
* <p>Grabs a terminal from a source character stream using this <code>Lexicon</code>. The variable returned by {@link #word()} is set to the longest nonempty prefix of the remaining source characters matching an <code>Expression</code> in this <code>Lexicon</code>. If no nonempty prefix matches an <code>Expression</code>, a <code>Lexicon.Exception</code> is thrown. If the longest matching prefix matches more than one <code>Expression</code>, the terminal associated with the <code>Expression</code> most recently constructed is returned. Blocks until a character is available, an I/O error occurs, or the end of the source stream is reached.</p>
*
* @param source the source character stream.
* @return the terminal grabbed from <code>source</code>.
* @throws Lexicon.Exception if an I/O or lexical error occurs.
*/
protected Object grab(LineNumberReader source) throws Exception {
	Set<Integer> S = initial();
	w.setLength(0);
	int wLength = 0;
	Object b = null;
	try {
		source.mark(w.capacity());
		do {
			int a = source.read();
			S = closure(transition(S, (char)a, R[w.length() % 2]));
			if (S.isEmpty()) break;

			if (a != -1) w.append((char)a); else w.append($);

			Integer f = accept(S);
			if (f != null) {
				wLength = w.length();
				b = F.get(f);
				source.mark(w.capacity());
			}
		} while (b != $);
		w.setLength(wLength);
		source.reset();
	}
	catch (IOException exception) {
		throw new Exception(exception.getMessage());
	}
	if (wLength == 0) throw new Exception("lexical error").extend(source);
	return b;
}

/**
* <p>Returns the word most recently grabbed using this <code>Lexicon</code>.</p>
*
* @return the word most recently grabbed by {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>}.
*/
protected String word() {
	return w.substring(0);
}
//Lexicon.interpret
/**
* <p>Repeatedly invokes {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>} until the end of the source stream reached. Blocks until a character is available, or an I/O error occurs. This method is overridden by <code>Grammar</code> and its parser subclasses, so it is only invoked when this <code>Lexicon</code> has not been extended into a <code>Grammar</code> or parser.</p>
*
* @param source the source character stream.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. A <code>Lexicon</code> always returns null.
* @throws Lexicon.Exception if an I/O or lexical error occurs.
*/
Object interpret(LineNumberReader source) throws Exception {/*debug*/
	if ((debug & TERMINALS) > 0) System.out.println(
		"----terminals\n\t" + E.keySet().toString().replaceFirst("\\[", "{").replaceAll(", ", " ").replaceFirst("\\]$", "}\n----------"));/*off*/

	for (Object a; (a = grab(source)) != $;)/*debug*/
		if ((debug & LEXICAL) > 0) System.out.println(
			a + (!a.equals(word()) ? " " + word() : ""))/*off*/
		;
	return null;
}

/**
* <p>Interprets a source character stream using this <code>Lexicon</code>.</p>
*
* @param source the source character stream.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(Reader source) throws Exception {
	return interpret(new LineNumberReader(source));
}

/**
* <p>Interprets a source string using this <code>Lexicon</code>.</p>
*
* @param source the source string.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(String source) throws Exception {
	return interpret(new StringReader(source));
}

/**
* <p>Interprets a source byte stream using this <code>Lexicon</code>.</p>
*
* @param source the source byte stream.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(InputStream source) throws Exception {
	return interpret(new InputStreamReader(source));
}

/**
* <p>Interprets the standard input stream using this <code>Lexicon</code>.</p>
*
* @return the <code>ParseTree</code> constructed by interpreting the standard input stream.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret() throws Exception {
	return interpret(System.in);
}

/**
* <p>Interprets a source file using this <code>Lexicon</code>.</p>
*
* @param source the source file.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws FileNotFoundException if the source file cannot be found.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(File source) throws FileNotFoundException, Exception {
	return interpret(new FileReader(source));
}

/**
* <p>Interprets a source pipe using this <code>Lexicon</code>.</p>
*
* @param source the source pipe.
* @return the <code>ParseTree</code> constructed by interpreting <code>source</code>.
* @throws IOException if the source pipe cannot be connected.
* @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs.
*/
public Object interpret(PipedWriter source) throws IOException, Exception {
	return interpret(new PipedReader(source));
}
//Lexicon.interpret(arguments)
/**
* <p>The debug switches, initially zero. The following bits enable debugging to standard output:</p>
* <blockquote><dl>
*	<dt><code>0x01</code> = <code>TERMINALS</code></dt>
*	<dd>Print the set of terminals before lexical analysis</dd>
*	<dt><code>0x02</code> = <code>LEXICAL</code> </dt>
*	<dd>Print terminals and associated words grabbed during lexical analysis</dd>
*	<dt><code>0x04</code> = <code>FIRST_FOLLOW</code></dt>
*	<dd>Print first and follow sets precomputed during syntax analysis</dd>
*	<dt><code>0x08</code> = <code>SYNTAX</code></dt>
*	<dd>Print parsing decisions made during syntax analysis</dd>
*	<dt><code>0x10</code> = <code>CONFLICT</code></dt>
*	<dd>Print parsing conflicts encountered during syntax analysis</dd>
*	<dt><code>0x20</code> = <code>PARSE_TREE</code></dt>
*	<dd>Print each <code>ParseTree</code> produced by syntax analysis</dd>
* </dl></blockquote>
* @since 1.1
*/
protected int debug = 0;

/**
* <p>{@link #debug <code>debug</code>} switch constant enabling printing the set of terminals before lexical analysis.</p>
* @since 1.1
*/
protected static final int TERMINALS = 0x01;
/**
* <p>{@link #debug <code>debug</code>} switch constant enabling printing terminals and associated words grabbed during lexical analysis.</p>
* @since 1.1
*/
protected static final int LEXICAL = 0x02;
/**
* <p>{@link #debug <code>debug</code>} switch constant enabling all debugging.</p>
* @since 1.1
*/
protected static final int VERBOSE = 0xFF;

/**
* <p>Lexical analysis by command-line arguments using this <code>Lexicon</code>. The first I/O or lexical error that occurs during lexical analysis is printed to the standard error stream.</p>
*

}

Author comment

Began life as a copy of #2000387

download  show line numbers   

Snippet is not live.

Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #2000391
Snippet name: class Lexicon
Eternal ID of this version: #2000391/1
Text MD5: 7006facc4697d72152e73a064810dbe0
Author: stefan
Category: javax
Type: New Tinybrain snippet
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-26 21:04:53
Source code size: 41258 bytes / 1209 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 471 / 106
Referenced in: [show references]