1 | /** |
2 | * <p>This class implements a {@link Lexicon}.</p> |
3 | * |
4 | * @version 1.3 |
5 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
6 | */ |
7 | class Lexicon { |
8 | //Q |
9 | /** |
10 | * <p>The number of lexical NFA states constructed.</p> |
11 | */ |
12 | private static int QSize = 0; |
13 | |
14 | /** |
15 | * <p>Creates a new state in the lexical NFA.</p> |
16 | * |
17 | * @return a new state in the lexical NFA. |
18 | */ |
19 | private static Integer s() { |
20 | return ++QSize; |
21 | } |
22 | //delta |
23 | /** |
24 | * <p>The transition relation of the lexical NFA.</p> |
25 | */ |
26 | private static final Stack<Stack<Object>> delta = new Stack<Stack<Object>>(); |
27 | |
28 | /** |
29 | * <p>Puts a transition into the lexical NFA.</p> |
30 | * |
31 | * @param s the state from which the transition is made. |
32 | * @param A the <code>Alphabet</code> on which the transition is made. |
33 | * @param r the state to which the transition is made. |
34 | */ |
35 | private static void put(Integer s, Alphabet A, Integer r) { |
36 | |
37 | if (Math.max(s,r) >= delta.size()) delta.setSize(Math.max(s,r)+1); |
38 | |
39 | Stack<Object> pairs = delta.get(s); |
40 | if (pairs == null) delta.set(s, pairs = new Stack<Object>()); |
41 | |
42 | pairs.push(A); |
43 | pairs.push(r); |
44 | } |
45 | //Set |
46 | /** |
47 | * <p>This class implements a {@link Lexicon.Set <code>Set</code>}.</p> |
48 | * |
49 | * @version 1.3 |
50 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
51 | * @param <E> the element type. |
52 | */ |
53 | static class Set<E> extends Stack<E> { |
54 | |
55 | /** |
56 | * <p>The null exclusion indicator. If <code>true</code>, <code>add</code> methods will not add <code>null</code> to this <code>Set</code>.</p> |
57 | */ |
58 | private final boolean excludeNull; |
59 | |
60 | /** |
61 | * <p>Constructs a <code>Set</code> with an initial capacity.</p> |
62 | * |
63 | * @param capacity the initial capacity. The magnitude of <code>capacity</code> is the initial capacity. The null exclusion indicator is initialized to <code>true</code> if <code>capacity</code> is negative. |
64 | */ |
65 | Set(int capacity) { |
66 | super(); |
67 | ensureCapacity(Math.abs(capacity)); |
68 | excludeNull = (capacity < 0); |
69 | } |
70 | |
71 | /** |
72 | * <p>Adds an element to this <code>Set</code>. The element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> |
73 | * |
74 | * @param element the element to add to this <code>Set</code>. |
75 | * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. |
76 | */ |
77 | public boolean add(E element) { |
78 | if (excludeNull && element == null || contains(element)) return false; |
79 | push(element); |
80 | return true; |
81 | } |
82 | |
83 | /** |
84 | * <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> |
85 | * |
86 | * @param index the index in <code>S</code> beyond which elements are added. |
87 | * @param S the <code>Set</code> to add to this <code>Set</code>. |
88 | * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. |
89 | */ |
90 | boolean add(int index, Set<E> S) { |
91 | if (S == null) return false; |
92 | boolean push = isEmpty(); |
93 | boolean add = false; |
94 | |
95 | for (int i = index; i < S.size(); i++) { |
96 | E element = S.get(i); |
97 | |
98 | if (!(excludeNull && element == null)) |
99 | if (push) { |
100 | push(element); |
101 | add = true; |
102 | } |
103 | else if (add(element)) |
104 | add = true; |
105 | } |
106 | return add; |
107 | } |
108 | |
109 | /** |
110 | * <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> |
111 | * |
112 | * @param S the <code>Set</code> to add to this <code>Set</code>. |
113 | * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. |
114 | */ |
115 | boolean add(Set<E> S) { |
116 | return add(0, S); |
117 | } |
118 | |
119 | public String toString() { |
120 | StringBuffer result = new StringBuffer(80); |
121 | result.append('{'); |
122 | |
123 | for (int i = 0; i < size(); i++) { |
124 | if (i > 0) result.append(' '); |
125 | result.append(get(i)); |
126 | } |
127 | result.append('}'); |
128 | return result.toString(); |
129 | } |
130 | //Set |
131 | } |
132 | //I |
133 | /** |
134 | * <p>The initial states of the lexical NFA. When empty, there is a need to compute the current initial states. It is computed only on demand created by {@link #initial()}.</p> |
135 | */ |
136 | private final Set<Integer> I; |
137 | //F |
138 | /** |
139 | * <p>The final states of the lexical NFA. A final state is mapped to the terminal it accepts in this <code>Lexicon</code>. When empty, there is a need to compute current final states. It is computed only on demand created by {@link #initial()}.</p> |
140 | */ |
141 | private final Map<Integer, Object> F; |
142 | //Lexicon.transition |
143 | /** |
144 | * <p>Computes a transition using the lexical NFA.</p> |
145 | * |
146 | * @param S the states from which the transition is made. |
147 | * @param a the character on which the transition is made. |
148 | * @param R the states to which the transition is made. |
149 | * @return the states to which the transition is made. |
150 | */ |
151 | private static Set<Integer> transition(Set<Integer> S, char a, Set<Integer> R) { |
152 | R.clear(); |
153 | |
154 | for (Integer s : S) { |
155 | Stack<Object> pairs = delta.get(s); |
156 | |
157 | if (pairs != null) |
158 | for (int k = 0; k < pairs.size(); k += 2) { |
159 | Alphabet A = (Alphabet)pairs.get(k); |
160 | |
161 | if (A != null) { |
162 | Integer r = (Integer)pairs.get(k+1); |
163 | if (A.contains(a)) R.add(r); |
164 | } |
165 | } |
166 | } |
167 | return R; |
168 | } |
169 | //Lexicon.closure |
170 | /** |
171 | * <p>Computes a reflexive transitive closure under empty transition using the lexical NFA. The closure is computed in place by a breadth-first search expanding <code>S</code>.</p> |
172 | * |
173 | * @param S the states whose reflexive transitive closure is computed under empty transition. |
174 | * @return the reflexive transitive closure of <code>S</code> under empty transition. |
175 | */ |
176 | private static Set<Integer> closure(Set<Integer> S) { |
177 | |
178 | for (int i = 0; i < S.size(); i++) { |
179 | Integer s = S.get(i); |
180 | Stack<Object> pairs = delta.get(s); |
181 | |
182 | if (pairs != null) |
183 | for (int k = 0; k < pairs.size(); k += 2) { |
184 | Alphabet A = (Alphabet)pairs.get(k); |
185 | |
186 | if (A == null) { |
187 | Integer r = (Integer)pairs.get(k+1); |
188 | S.add(r); |
189 | } |
190 | } |
191 | } |
192 | return S; |
193 | } |
194 | //Expression |
195 | /** |
196 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a regular language.</p> |
197 | * |
198 | * @version 1.3 |
199 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
200 | */ |
201 | abstract public static class Expression implements Cloneable { |
202 | |
203 | /** |
204 | * <p>The initial state of the NFA constructed from this <code>Expression</code>.</p> |
205 | */ |
206 | Integer i; |
207 | /** |
208 | * <p>The final state of the NFA constructed from this <code>Expression</code>.</p> |
209 | */ |
210 | Integer f; |
211 | |
212 | /** |
213 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
214 | * |
215 | * @return a clone of this <code>Expression</code>. |
216 | */ |
217 | abstract public Object clone(); |
218 | } |
219 | //Alphabet |
220 | /** |
221 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} of character symbols.</p> |
222 | * |
223 | * @version 1.3 |
224 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
225 | */ |
226 | abstract public static class Alphabet extends Expression { |
227 | |
228 | /** |
229 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
230 | * |
231 | * @param a the character whose status is requested. |
232 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
233 | */ |
234 | abstract boolean contains(char a); |
235 | } |
236 | //Match |
237 | /** |
238 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing some characters.</p> |
239 | * |
240 | * @version 1.3 |
241 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
242 | */ |
243 | public static class Match extends Alphabet { |
244 | |
245 | /** |
246 | * <p>The {@link Character} or {@link String} representing this <code>Alphabet</code>.</p> |
247 | */ |
248 | final Object A; |
249 | |
250 | /** |
251 | * <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p> |
252 | * |
253 | * @param i the initial state of the NFA constructed. |
254 | * @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>. |
255 | * @param f the final state of the NFA constructed. |
256 | */ |
257 | private Match(Integer i, Object A, Integer f) { |
258 | this.A = A; |
259 | put(this.i = i, this, this.f = f); |
260 | } |
261 | |
262 | /** |
263 | * <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p> |
264 | * |
265 | * @param i the initial state of the NFA constructed. |
266 | * @param a the character in this <code>Alphabet</code>. |
267 | * @param f the final state of the NFA constructed. |
268 | */ |
269 | private Match(Integer i, char a, Integer f) { |
270 | this(i, new Character(a), f); |
271 | } |
272 | |
273 | /** |
274 | * <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p> |
275 | * |
276 | * @param a the character in this <code>Alphabet</code>. |
277 | */ |
278 | public Match(char a) { |
279 | this(s(), a, s()); |
280 | } |
281 | |
282 | /** |
283 | * <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p> |
284 | * |
285 | * @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>. |
286 | */ |
287 | public Match(Object A) { |
288 | this(s(), A, s()); |
289 | } |
290 | |
291 | /** |
292 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
293 | * |
294 | * @param a the character whose status is requested. |
295 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
296 | */ |
297 | boolean contains(char a) { |
298 | if (A instanceof Character) |
299 | return (Character)A == a; |
300 | |
301 | if (A instanceof String) |
302 | return ((String)A).indexOf(a) != -1; |
303 | |
304 | if (A instanceof Stack<?>) |
305 | for (Alphabet alphabet : (Stack<Alphabet>)A) |
306 | if (alphabet.contains(a)) return true; |
307 | return false; |
308 | } |
309 | |
310 | /** |
311 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
312 | * |
313 | * @return a clone of this <code>Alphabet</code>. |
314 | */ |
315 | public Object clone() { |
316 | return new Match(A); |
317 | } |
318 | } |
319 | //NonMatch |
320 | /** |
321 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing all except some characters.</p> |
322 | * |
323 | * @version 1.3 |
324 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
325 | */ |
326 | public static class NonMatch extends Match { |
327 | |
328 | /** |
329 | * <p>Constructs an <code>Alphabet</code> containing all characters except one, and builds the NFA constructed from this <code>Expression</code>.</p> |
330 | * |
331 | * @param a the character not in this <code>Alphabet</code>. |
332 | */ |
333 | public NonMatch(char a) { |
334 | super(a); |
335 | } |
336 | |
337 | /** |
338 | * <p>Constructs an <code>Alphabet</code> containing all characters except some, and builds the NFA constructed from this <code>Expression</code>.</p> |
339 | * |
340 | * @param A the {@link Character} or {@link String} of characters not in this <code>Alphabet</code>. |
341 | */ |
342 | public NonMatch(Object A) { |
343 | super(A); |
344 | } |
345 | |
346 | /** |
347 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
348 | * |
349 | * @param a the character whose status is requested. |
350 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
351 | */ |
352 | boolean contains(char a) { |
353 | return a != (char)-1 && !super.contains(a); |
354 | } |
355 | |
356 | /** |
357 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
358 | * |
359 | * @return a clone of this <code>Alphabet</code>. |
360 | */ |
361 | public Object clone() { |
362 | return new NonMatch(A); |
363 | } |
364 | } |
365 | //Range |
366 | /** |
367 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a range.</p> |
368 | * |
369 | * @version 1.3 |
370 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
371 | */ |
372 | public static class Range extends Alphabet { |
373 | |
374 | /** |
375 | * <p>The first character in the range.</p> |
376 | */ |
377 | private final char a1; |
378 | /** |
379 | * <p>The last character in the range.</p> |
380 | */ |
381 | private final char a2; |
382 | |
383 | /** |
384 | * <p>Constructs an <code>Alphabet</code> containing the characters in a range, and builds the NFA constructed from this <code>Expression</code>.</p> |
385 | * |
386 | * @param a1 the first character in the range. |
387 | * @param a2 the last character in the range. |
388 | */ |
389 | public Range(char a1, char a2) { |
390 | this.a1 = a1; |
391 | this.a2 = a2; |
392 | put(i = s(), this, f = s()); |
393 | } |
394 | |
395 | /** |
396 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
397 | * |
398 | * @param a the character whose status is requested. |
399 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
400 | */ |
401 | boolean contains(char a) { |
402 | return a1 <= a && a <= a2; |
403 | } |
404 | |
405 | /** |
406 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
407 | * |
408 | * @return a clone of this <code>Alphabet</code>. |
409 | */ |
410 | public Object clone() { |
411 | return new Range(a1, a2); |
412 | } |
413 | } |
414 | |
415 | /** |
416 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a POSIX character class.</p> |
417 | * |
418 | * @version 1.3 |
419 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
420 | */ |
421 | public static class PosixClass extends Alphabet { |
422 | |
423 | /** |
424 | * <p>The bit mask representing this <code>PosixClass</code>.</p> |
425 | */ |
426 | private final int posixClass; |
427 | |
428 | /** |
429 | * <p>Constructs an <code>Alphabet</code> containing the characters in a POSIX character class, and builds the NFA constructed from this <code>Expression</code>.</p> |
430 | * |
431 | * @param posixClass the bit mask representing this <code>PosixClass</code>. |
432 | */ |
433 | private PosixClass(int posixClass) { |
434 | this.posixClass = posixClass; |
435 | put(i = s(), this, f = s()); |
436 | } |
437 | |
438 | /** |
439 | * <p>Creates an <code>Alphabet</code> containing the uppercase alphabetic characters.</p> |
440 | * |
441 | * @return an <code>Alphabet</code> containing the uppercase alphabetic characters. |
442 | */ |
443 | public static PosixClass upper() { |
444 | return new PosixClass(0x0001); |
445 | } |
446 | |
447 | /** |
448 | * <p>Creates an <code>Alphabet</code> containing the lowercase alphabetic characters.</p> |
449 | * |
450 | * @return an <code>Alphabet</code> containing the lowercase alphabetic characters. |
451 | */ |
452 | public static PosixClass lower() { |
453 | return new PosixClass(0x0002); |
454 | } |
455 | |
456 | /** |
457 | * <p>Creates an <code>Alphabet</code> containing the alphabetic characters.</p> |
458 | * |
459 | * @return an <code>Alphabet</code> containing the alphabetic characters. |
460 | */ |
461 | public static PosixClass alpha() { |
462 | return new PosixClass(0x0004); |
463 | } |
464 | |
465 | /** |
466 | * <p>Creates an <code>Alphabet</code> containing the decimal digit characters.</p> |
467 | * |
468 | * @return an <code>Alphabet</code> containing the decimal digit characters. |
469 | */ |
470 | public static PosixClass digit() { |
471 | return new PosixClass(0x0008); |
472 | } |
473 | |
474 | /** |
475 | * <p>Creates an <code>Alphabet</code> containing the hexadecimal digit characters.</p> |
476 | * |
477 | * @return an <code>Alphabet</code> containing the hexadecimal digit characters. |
478 | */ |
479 | public static PosixClass xdigit() { |
480 | return new PosixClass(0x0010); |
481 | } |
482 | |
483 | /** |
484 | * <p>Creates an <code>Alphabet</code> containing the alphanumeric characters.</p> |
485 | * |
486 | * @return an <code>Alphabet</code> containing the alphanumeric characters. |
487 | */ |
488 | public static PosixClass alnum() { |
489 | return new PosixClass(0x0020); |
490 | } |
491 | |
492 | /** |
493 | * <p>Creates an <code>Alphabet</code> containing the punctuation characters.</p> |
494 | * |
495 | * @return an <code>Alphabet</code> containing the punctuation characters. |
496 | */ |
497 | public static PosixClass punct() { |
498 | return new PosixClass(0x0040); |
499 | } |
500 | |
501 | /** |
502 | * <p>Creates an <code>Alphabet</code> containing the graphical characters.</p> |
503 | * |
504 | * @return an <code>Alphabet</code> containing the graphical characters. |
505 | */ |
506 | public static PosixClass graph() { |
507 | return new PosixClass(0x0080); |
508 | } |
509 | |
510 | /** |
511 | * <p>Creates an <code>Alphabet</code> containing the printable characters.</p> |
512 | * |
513 | * @return an <code>Alphabet</code> containing the printable characters. |
514 | */ |
515 | public static PosixClass print() { |
516 | return new PosixClass(0x0100); |
517 | } |
518 | |
519 | /** |
520 | * <p>Creates an <code>Alphabet</code> containing the blank characters.</p> |
521 | * |
522 | * @return an <code>Alphabet</code> containing the blank characters. |
523 | */ |
524 | public static PosixClass blank() { |
525 | return new PosixClass(0x0200); |
526 | } |
527 | |
528 | /** |
529 | * <p>Creates an <code>Alphabet</code> containing the space characters.</p> |
530 | * |
531 | * @return an <code>Alphabet</code> containing the space characters. |
532 | */ |
533 | public static PosixClass space() { |
534 | return new PosixClass(0x0400); |
535 | } |
536 | |
537 | /** |
538 | * <p>Creates an <code>Alphabet</code> containing the control characters.</p> |
539 | * |
540 | * @return an <code>Alphabet</code> containing the control characters. |
541 | */ |
542 | public static PosixClass cntrl() { |
543 | return new PosixClass(0x0800); |
544 | } |
545 | |
546 | /** |
547 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
548 | * |
549 | * @param a the character whose status is requested. |
550 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
551 | */ |
552 | boolean contains(char a) { |
553 | int UPPER = 0x0001; int LOWER = 0x0002; |
554 | int ALPHA = 0x0004; int DIGIT = 0x0008; |
555 | int XDIGIT = 0x0010; int ALNUM = 0x0020; |
556 | int PUNCT = 0x0040; int GRAPH = 0x0080; |
557 | int PRINT = 0x0100; int BLANK = 0x0200; |
558 | int SPACE = 0x0400; int CNTRL = 0x0800; |
559 | int classes = 0; |
560 | |
561 | switch (Character.getType(a)) { |
562 | default: break; |
563 | case Character.UPPERCASE_LETTER: |
564 | classes |= UPPER | ALPHA | (('A' <= a && a <= 'F') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break; |
565 | case Character.LOWERCASE_LETTER: |
566 | classes |= LOWER | ALPHA | (('a' <= a && a <= 'f') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break; |
567 | case Character.TITLECASE_LETTER: |
568 | case Character.MODIFIER_LETTER: |
569 | case Character.OTHER_LETTER: |
570 | classes |= ALPHA | ALNUM | GRAPH | PRINT; break; |
571 | case Character.NON_SPACING_MARK: |
572 | case Character.COMBINING_SPACING_MARK: |
573 | case Character.ENCLOSING_MARK: |
574 | classes |= PUNCT | GRAPH | PRINT; break; |
575 | case Character.DECIMAL_DIGIT_NUMBER: |
576 | classes |= DIGIT | XDIGIT | ALNUM | GRAPH | PRINT; break; |
577 | case Character.LETTER_NUMBER: |
578 | case Character.OTHER_NUMBER: |
579 | classes |= ALNUM | GRAPH | PRINT; break; |
580 | case Character.CONNECTOR_PUNCTUATION: |
581 | case Character.DASH_PUNCTUATION: |
582 | case Character.START_PUNCTUATION: |
583 | case Character.END_PUNCTUATION: |
584 | case Character.INITIAL_QUOTE_PUNCTUATION: |
585 | case Character.FINAL_QUOTE_PUNCTUATION: |
586 | case Character.OTHER_PUNCTUATION: |
587 | case Character.MATH_SYMBOL: |
588 | case Character.CURRENCY_SYMBOL: |
589 | case Character.MODIFIER_SYMBOL: |
590 | case Character.OTHER_SYMBOL: |
591 | classes |= PUNCT | GRAPH | PRINT; break; |
592 | case Character.SPACE_SEPARATOR: |
593 | classes |= PRINT | BLANK | SPACE; break; |
594 | case Character.LINE_SEPARATOR: |
595 | case Character.PARAGRAPH_SEPARATOR: |
596 | break; |
597 | case Character.CONTROL: |
598 | classes |= ((a == '\t') ? BLANK : 0) | ((a == '\t' || a == '\n' || a == '\013' || a == '\f' || a == '\r') ? SPACE : 0) | CNTRL; break; |
599 | case Character.FORMAT: |
600 | case Character.SURROGATE: |
601 | case Character.PRIVATE_USE: |
602 | case Character.UNASSIGNED: |
603 | break; |
604 | } |
605 | return (classes & posixClass) != 0; |
606 | } |
607 | |
608 | /** |
609 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
610 | * |
611 | * @return a clone of this <code>Alphabet</code>. |
612 | */ |
613 | public Object clone() { |
614 | return new PosixClass(posixClass); |
615 | } |
616 | } |
617 | //UnicodeCategory |
618 | /** |
619 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a Unicode general category.</p> |
620 | * |
621 | * @version 1.3 |
622 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
623 | */ |
624 | public static class UnicodeCategory extends Alphabet { |
625 | |
626 | /** |
627 | * <p>The byte representing the Unicode general category.</p> |
628 | */ |
629 | private final byte category; |
630 | |
631 | /** |
632 | * <p>Constructs an <code>Alphabet</code> containing the characters in a Unicode general category, and builds the NFA constructed from this <code>Expression</code>. The class {@link Character} defines byte constants representing each of the Unicode general categories.</p> |
633 | * |
634 | * @param category The byte representing the Unicode general category. |
635 | * @see Character |
636 | */ |
637 | public UnicodeCategory(byte category) { |
638 | this.category = category; |
639 | put(i = s(), this, f = s()); |
640 | } |
641 | |
642 | /** |
643 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
644 | * |
645 | * @param a the character whose status is requested. |
646 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
647 | */ |
648 | boolean contains(char a) { |
649 | return Character.getType(a) == category; |
650 | } |
651 | |
652 | /** |
653 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
654 | * |
655 | * @return a clone of this <code>Alphabet</code>. |
656 | */ |
657 | public Object clone() { |
658 | return new UnicodeCategory(category); |
659 | } |
660 | } |
661 | //Repetition |
662 | /** |
663 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the repetition of a regular language.</p> |
664 | * |
665 | * @version 1.3 |
666 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
667 | */ |
668 | public static class Repetition extends Expression { |
669 | |
670 | /** |
671 | * <p>The operand <code>Expression</code>.</p> |
672 | */ |
673 | private final Expression e1; |
674 | /** |
675 | * <p>The minimum number of times <code>e1</code> is repeated.</p> |
676 | */ |
677 | private final int min; |
678 | /** |
679 | * <p>The maximum number of times <code>e1</code> is repeated.</p> |
680 | */ |
681 | private final int max; |
682 | |
683 | /** |
684 | * <p>Constructs an <code>Expression</code> expressing the repetition of a regular language, and builds the NFA constructed from this <code>Expression</code>. Large finite values for the minimum or maximum cause the NFA constructed from the operand <code>Expression</code> to be copied many times, resulting in a space-inefficient NFA.</p> |
685 | * |
686 | * @param e1 the operand <code>Expression</code>. |
687 | * @param min the minimum number of times <code>e1</code> is repeated. If negative, it is assumed to be zero. |
688 | * @param max the maximum number of times <code>e1</code> is repeated. If negative, it is assumed to be infinity. |
689 | */ |
690 | public Repetition(Expression e1, int min, int max) { |
691 | this.e1 = e1 = (Expression)e1.clone(); |
692 | this.min = min = Math.max(min, 0); |
693 | this.max = max; |
694 | |
695 | i = (min > 0) ? e1.i : s(); |
696 | f = (min > 0) ? e1.f : i; |
697 | |
698 | if (min == 0 && max < 0) { |
699 | put(i, null, e1.i); |
700 | put(e1.f, null, i); |
701 | } |
702 | else { |
703 | for (int k = 2; k <= min; k++) { |
704 | e1 = (Expression)e1.clone(); |
705 | put(f, null, e1.i); |
706 | f = e1.f; |
707 | } |
708 | if (max > min) { |
709 | Integer tail = f; |
710 | put(tail, null, f = s()); |
711 | |
712 | for (int k = min+1; k <= max; k++) { |
713 | if (k > 1) e1 = (Expression)e1.clone(); |
714 | put(tail, null, e1.i); |
715 | put(tail = e1.f, null, f); |
716 | } |
717 | } |
718 | else if (max < 0) put(f, null, e1.i); |
719 | } |
720 | } |
721 | |
722 | /** |
723 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
724 | * |
725 | * @return a clone of this <code>Expression</code>. |
726 | */ |
727 | public Object clone() { |
728 | return new Repetition(e1, min, max); |
729 | } |
730 | } |
731 | //Concatenation |
732 | /** |
733 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the concatenation of two regular languages.</p> |
734 | * |
735 | * @version 1.3 |
736 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
737 | */ |
738 | public static class Concatenation extends Expression { |
739 | |
740 | /** |
741 | * <p>The left operand <code>Expression</code>.</p> |
742 | */ |
743 | private final Expression e1; |
744 | /** |
745 | * <p>The right operand <code>Expression</code>.</p> |
746 | */ |
747 | private final Expression e2; |
748 | |
749 | /** |
750 | * <p>Constructs an <code>Expression</code> expressing the concatenation of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p> |
751 | * |
752 | * @param e1 the left operand <code>Expression</code>. |
753 | * @param e2 the right operand <code>Expression</code>. |
754 | */ |
755 | public Concatenation(Expression e1, Expression e2) { |
756 | this.e1 = e1 = (Expression)e1.clone(); |
757 | this.e2 = e2 = (Expression)e2.clone(); |
758 | |
759 | i = e1.i; |
760 | f = e2.f; |
761 | |
762 | put(e1.f, null, e2.i); |
763 | } |
764 | |
765 | /** |
766 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
767 | * |
768 | * @return a clone of this <code>Expression</code>. |
769 | */ |
770 | public Object clone() { |
771 | return new Concatenation(e1, e2); |
772 | } |
773 | } |
774 | //Singleton |
775 | /** |
776 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a singleton language.</p> |
777 | * |
778 | * @version 1.3 |
779 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
780 | */ |
781 | public static class Singleton extends Expression { |
782 | |
783 | /** |
784 | * <p>The string whose singleton language is expressed.</p> |
785 | */ |
786 | private final String x; |
787 | |
788 | /** |
789 | * <p>Constructs an <code>Expression</code> expressing a singleton language, and builds the NFA constructed from this <code>Expression</code>.</p> |
790 | * |
791 | * @param x the string whose singleton language is expressed. |
792 | */ |
793 | public Singleton(String x) { |
794 | this.x = x; |
795 | |
796 | f = i = s(); |
797 | |
798 | for (char c : x.toCharArray()) |
799 | new Match(f, c, f = s()); |
800 | } |
801 | |
802 | /** |
803 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
804 | * |
805 | * @return a clone of this <code>Expression</code>. |
806 | */ |
807 | public Object clone() { |
808 | return new Singleton(x); |
809 | } |
810 | } |
811 | //Union |
812 | /** |
813 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the union of two regular languages.</p> |
814 | * |
815 | * @version 1.3 |
816 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
817 | */ |
818 | public static class Union extends Expression { |
819 | |
820 | /** |
821 | * <p>The left operand <code>Expression</code>.</p> |
822 | */ |
823 | private final Expression e1; |
824 | /** |
825 | * <p>The right operand <code>Expression</code>.</p> |
826 | */ |
827 | private final Expression e2; |
828 | |
829 | /** |
830 | * <p>Constructs an <code>Expression</code> expressing the union of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p> |
831 | * |
832 | * @param e1 the left operand <code>Expression</code>. |
833 | * @param e2 the right operand <code>Expression</code>. |
834 | */ |
835 | public Union(Expression e1, Expression e2) { |
836 | this.e1 = e1 = (Expression)e1.clone(); |
837 | this.e2 = e2 = (Expression)e2.clone(); |
838 | |
839 | i = s(); |
840 | f = s(); |
841 | |
842 | put(i, null, e1.i); put(e1.f, null, f); |
843 | put(i, null, e2.i); put(e2.f, null, f); |
844 | } |
845 | |
846 | /** |
847 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
848 | * |
849 | * @return a clone of this <code>Expression</code>. |
850 | */ |
851 | public Object clone() { |
852 | return new Union(e1, e2); |
853 | } |
854 | } |
855 | //expression(ere) |
856 | /** |
857 | * <p>Creates an <code>Expression</code> by interpreting a POSIX extended regular expression (ERE), as used in egrep. The syntax and semantics for EREs is formally specified by the <a href="../../../src/gi/ERE.java">ERE <code>Grammar</code></a>. Provides a convenient method for constructing an <code>Expression</code>, at the cost of an LR(1) parse. Implementations seeking maximum speed should avoid this method and use explicit <code>Expression</code> subclass constructors; for example,</p> |
858 | * <blockquote><code>new Union(new NonMatch("0"), new Singleton("foo"))</code></blockquote> |
859 | * instead of |
860 | * <blockquote><code>expression("[^0]|foo")</code></blockquote> |
861 | * |
862 | * @param ere the POSIX extended regular expression (ERE) to interpret. |
863 | * @return the <code>Expression</code> constructed by interpreting <code>ere</code>. |
864 | * @throws Lexicon.Exception if an ERE syntax error occurs. |
865 | */ |
866 | public static Expression expression(String ere) throws Exception { |
867 | return ERE.expression(ere); |
868 | } |
869 | //E |
870 | /** |
871 | * <p>The mapping representing this <code>Lexicon</code>. A terminal is mapped to the initial state of the NFA constructed from the associated <code>Expression</code>.</p> |
872 | */ |
873 | private final Map<Object, Expression> E; |
874 | |
875 | /** |
876 | * <p>Puts a terminal and associated <code>Expression</code> into this <code>Lexicon</code>. The <code>Expression</code> supersedes any previously associated with the terminal.</p> |
877 | * |
878 | * @param a the terminal to add to this <code>Lexicon</code>. |
879 | * @param e the <code>Expression</code> associated with terminal <code>a</code>. When grabbing, the language expressed by <code>e</code> matches <code>a</code>. |
880 | */ |
881 | public void put(Object a, Expression e) { |
882 | E.put(a, e); |
883 | I.clear(); |
884 | F.clear(); |
885 | } |
886 | |
887 | /** |
888 | * <p>Indicates whether a symbol is a terminal in this <code>Lexicon</code>.</p> |
889 | * |
890 | * @param a the symbol whose status is requested. |
891 | * @return <code>true</code> if <code>a</code> is a terminal in this <code>Lexicon</code>; <code>false</code> otherwise. |
892 | */ |
893 | boolean terminal(Object a) { |
894 | return E.containsKey(a); |
895 | } |
896 | //Lexicon() |
897 | /** |
898 | * <p>The terminal matched by the character at the end of a source stream.</p> |
899 | * @since 1.1, renames <code>END_OF_SOURCE</code> in version 1.0. |
900 | */ |
901 | protected static final Object $ = new String("$"); |
902 | |
903 | /** |
904 | * <p>The <code>Alphabet</code> containing the character at the end of a source stream.</p> |
905 | */ |
906 | private static final Expression $_EXPRESSION = new Match((char)-1); |
907 | |
908 | /** |
909 | * <p>Constructs an empty <code>Lexicon</code>.</p> |
910 | */ |
911 | protected Lexicon() { |
912 | E = new HashMap<Object, Expression>(500); |
913 | I = new Set<Integer>(-200); |
914 | F = new HashMap<Integer, Object>(500); |
915 | put($, $_EXPRESSION); |
916 | } |
917 | |
918 | /** |
919 | * <p>Constructs a <code>Lexicon</code> that is a shallow copy of <code>lexicon</code>. The fields of the new <code>Lexicon</code> refer to the same elements as those in <code>lexicon</code>.</p> |
920 | * |
921 | * |
922 | * @param lexicon the <code>Lexicon</code> to copy. |
923 | */ |
924 | Lexicon(Lexicon lexicon) {/*debug*/ |
925 | debug = lexicon.debug;/*off*/ |
926 | E = lexicon.E; |
927 | I = lexicon.I; |
928 | F = lexicon.F; |
929 | } |
930 | //Lexicon.initial |
931 | /** |
932 | * <p>Returns the initial states of the lexical NFA.</p> |
933 | * |
934 | * @return {@link #I}, computing it and {@link #F} if there is a need to compute the current initial states and final states. |
935 | */ |
936 | private Set<Integer> initial() { |
937 | |
938 | if (I.isEmpty()) { |
939 | |
940 | for (Object a : E.keySet()) { |
941 | Expression e = E.get(a); |
942 | |
943 | I.add(e.i); |
944 | F.put(e.f, a); |
945 | } |
946 | closure(I); |
947 | } |
948 | return I; |
949 | } |
950 | //accept |
951 | /** |
952 | * <p>Computes the current final state, if any, in the lexical NFA.</p> |
953 | * |
954 | * @param S the current states. |
955 | * @return the maximum final state in <code>S</code>. Returns <code>null</code> if <code>S</code> contains no final states. |
956 | */ |
957 | private Integer accept(Set<Integer> S) { |
958 | |
959 | Integer |
960 | f = null; |
961 | |
962 | for (Integer s : S) |
963 | if (F.containsKey(s)) |
964 | if (f == null || f < s) f = s; |
965 | |
966 | return f; |
967 | } |
968 | |
969 | /** |
970 | * <p>This class implements an {@link Lexicon.Exception <code>Exception</code>}.</p> |
971 | * |
972 | * @version 1.3 |
973 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
974 | */ |
975 | public class Exception extends java.lang.Exception { |
976 | |
977 | /** |
978 | * <p>The extended error message.</p> |
979 | */ |
980 | private StringBuffer message; |
981 | |
982 | /** |
983 | * <p>Constructs an <code>Exception</code> with a message.</p> |
984 | * |
985 | * @param message the error message. |
986 | */ |
987 | public Exception(String message) { |
988 | super(message); |
989 | } |
990 | |
991 | /** |
992 | * <p>Returns the error message.</p> |
993 | * |
994 | * @return the error message. |
995 | */ |
996 | public String getMessage() { |
997 | return (message == null) ? super.getMessage() : message.toString(); |
998 | } |
999 | |
1000 | /** |
1001 | * <p>Extends the error message in this <code>Exception</code>. The extended message includes the line number, message and source characters following the error.</p> |
1002 | * |
1003 | * @param source the source character stream. |
1004 | * @return this <code>Exception</code> with an extended message. |
1005 | */ |
1006 | Exception extend(LineNumberReader source) { |
1007 | if (message == null) message = new StringBuffer(132); |
1008 | else message.setLength(0); |
1009 | |
1010 | message.append("line "); |
1011 | message.append(source.getLineNumber()+1); |
1012 | message.append(": "); |
1013 | message.append(super.getMessage()); |
1014 | message.append(System.getProperty("line.separator")); |
1015 | message.append("..."); |
1016 | message.append(word()); |
1017 | try { |
1018 | String rest = source.readLine(); |
1019 | if (rest != null) message.append(rest); |
1020 | } |
1021 | catch (IOException exception) {} |
1022 | message.append(System.getProperty("line.separator")); |
1023 | message.append(" ^"); |
1024 | return this; |
1025 | } |
1026 | } |
1027 | //Lexicon.grab |
1028 | /** |
1029 | * <p>The states through which the lexical NFA transitions.</p> |
1030 | */ |
1031 | private final Set<Integer>[] R = (Set<Integer>[])new Set<?>[]{new Set<Integer>(-200), new Set<Integer>(-200)}; |
1032 | /** |
1033 | * <p>The <code>StringBuffer</code> containing the word most recently grabbed.</p> |
1034 | */ |
1035 | private final StringBuffer w = new StringBuffer(4000); |
1036 | |
1037 | /** |
1038 | * <p>Grabs a terminal from a source character stream using this <code>Lexicon</code>. The variable returned by {@link #word()} is set to the longest nonempty prefix of the remaining source characters matching an <code>Expression</code> in this <code>Lexicon</code>. If no nonempty prefix matches an <code>Expression</code>, a <code>Lexicon.Exception</code> is thrown. If the longest matching prefix matches more than one <code>Expression</code>, the terminal associated with the <code>Expression</code> most recently constructed is returned. Blocks until a character is available, an I/O error occurs, or the end of the source stream is reached.</p> |
1039 | * |
1040 | * @param source the source character stream. |
1041 | * @return the terminal grabbed from <code>source</code>. |
1042 | * @throws Lexicon.Exception if an I/O or lexical error occurs. |
1043 | */ |
1044 | protected Object grab(LineNumberReader source) throws Exception { |
1045 | Set<Integer> S = initial(); |
1046 | w.setLength(0); |
1047 | int wLength = 0; |
1048 | Object b = null; |
1049 | try { |
1050 | source.mark(w.capacity()); |
1051 | do { |
1052 | int a = source.read(); |
1053 | S = closure(transition(S, (char)a, R[w.length() % 2])); |
1054 | if (S.isEmpty()) break; |
1055 | |
1056 | if (a != -1) w.append((char)a); else w.append($); |
1057 | |
1058 | Integer f = accept(S); |
1059 | if (f != null) { |
1060 | wLength = w.length(); |
1061 | b = F.get(f); |
1062 | source.mark(w.capacity()); |
1063 | } |
1064 | } while (b != $); |
1065 | w.setLength(wLength); |
1066 | source.reset(); |
1067 | } |
1068 | catch (IOException exception) { |
1069 | throw new Exception(exception.getMessage()); |
1070 | } |
1071 | if (wLength == 0) throw new Exception("lexical error").extend(source); |
1072 | return b; |
1073 | } |
1074 | |
1075 | /** |
1076 | * <p>Returns the word most recently grabbed using this <code>Lexicon</code>.</p> |
1077 | * |
1078 | * @return the word most recently grabbed by {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>}. |
1079 | */ |
1080 | protected String word() { |
1081 | return w.substring(0); |
1082 | } |
1083 | //Lexicon.interpret |
1084 | /** |
1085 | * <p>Repeatedly invokes {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>} until the end of the source stream reached. Blocks until a character is available, or an I/O error occurs. This method is overridden by <code>Grammar</code> and its parser subclasses, so it is only invoked when this <code>Lexicon</code> has not been extended into a <code>Grammar</code> or parser.</p> |
1086 | * |
1087 | * @param source the source character stream. |
1088 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. A <code>Lexicon</code> always returns null. |
1089 | * @throws Lexicon.Exception if an I/O or lexical error occurs. |
1090 | */ |
1091 | Object interpret(LineNumberReader source) throws Exception {/*debug*/ |
1092 | if ((debug & TERMINALS) > 0) System.out.println( |
1093 | "----terminals\n\t" + E.keySet().toString().replaceFirst("\\[", "{").replaceAll(", ", " ").replaceFirst("\\]$", "}\n----------"));/*off*/ |
1094 | |
1095 | for (Object a; (a = grab(source)) != $;)/*debug*/ |
1096 | if ((debug & LEXICAL) > 0) System.out.println( |
1097 | a + (!a.equals(word()) ? " " + word() : ""))/*off*/ |
1098 | ; |
1099 | return null; |
1100 | } |
1101 | |
1102 | /** |
1103 | * <p>Interprets a source character stream using this <code>Lexicon</code>.</p> |
1104 | * |
1105 | * @param source the source character stream. |
1106 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1107 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1108 | */ |
1109 | public Object interpret(Reader source) throws Exception { |
1110 | return interpret(new LineNumberReader(source)); |
1111 | } |
1112 | |
1113 | /** |
1114 | * <p>Interprets a source string using this <code>Lexicon</code>.</p> |
1115 | * |
1116 | * @param source the source string. |
1117 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1118 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1119 | */ |
1120 | public Object interpret(String source) throws Exception { |
1121 | return interpret(new StringReader(source)); |
1122 | } |
1123 | |
1124 | /** |
1125 | * <p>Interprets a source byte stream using this <code>Lexicon</code>.</p> |
1126 | * |
1127 | * @param source the source byte stream. |
1128 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1129 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1130 | */ |
1131 | public Object interpret(InputStream source) throws Exception { |
1132 | return interpret(new InputStreamReader(source)); |
1133 | } |
1134 | |
1135 | /** |
1136 | * <p>Interprets the standard input stream using this <code>Lexicon</code>.</p> |
1137 | * |
1138 | * @return the <code>ParseTree</code> constructed by interpreting the standard input stream. |
1139 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1140 | */ |
1141 | public Object interpret() throws Exception { |
1142 | return interpret(System.in); |
1143 | } |
1144 | |
1145 | /** |
1146 | * <p>Interprets a source file using this <code>Lexicon</code>.</p> |
1147 | * |
1148 | * @param source the source file. |
1149 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1150 | * @throws FileNotFoundException if the source file cannot be found. |
1151 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1152 | */ |
1153 | public Object interpret(File source) throws FileNotFoundException, Exception { |
1154 | return interpret(new FileReader(source)); |
1155 | } |
1156 | |
1157 | /** |
1158 | * <p>Interprets a source pipe using this <code>Lexicon</code>.</p> |
1159 | * |
1160 | * @param source the source pipe. |
1161 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1162 | * @throws IOException if the source pipe cannot be connected. |
1163 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1164 | */ |
1165 | public Object interpret(PipedWriter source) throws IOException, Exception { |
1166 | return interpret(new PipedReader(source)); |
1167 | } |
1168 | //Lexicon.interpret(arguments) |
1169 | /** |
1170 | * <p>The debug switches, initially zero. The following bits enable debugging to standard output:</p> |
1171 | * <blockquote><dl> |
1172 | * <dt><code>0x01</code> = <code>TERMINALS</code></dt> |
1173 | * <dd>Print the set of terminals before lexical analysis</dd> |
1174 | * <dt><code>0x02</code> = <code>LEXICAL</code> </dt> |
1175 | * <dd>Print terminals and associated words grabbed during lexical analysis</dd> |
1176 | * <dt><code>0x04</code> = <code>FIRST_FOLLOW</code></dt> |
1177 | * <dd>Print first and follow sets precomputed during syntax analysis</dd> |
1178 | * <dt><code>0x08</code> = <code>SYNTAX</code></dt> |
1179 | * <dd>Print parsing decisions made during syntax analysis</dd> |
1180 | * <dt><code>0x10</code> = <code>CONFLICT</code></dt> |
1181 | * <dd>Print parsing conflicts encountered during syntax analysis</dd> |
1182 | * <dt><code>0x20</code> = <code>PARSE_TREE</code></dt> |
1183 | * <dd>Print each <code>ParseTree</code> produced by syntax analysis</dd> |
1184 | * </dl></blockquote> |
1185 | * @since 1.1 |
1186 | */ |
1187 | protected int debug = 0; |
1188 | |
1189 | /** |
1190 | * <p>{@link #debug <code>debug</code>} switch constant enabling printing the set of terminals before lexical analysis.</p> |
1191 | * @since 1.1 |
1192 | */ |
1193 | protected static final int TERMINALS = 0x01; |
1194 | /** |
1195 | * <p>{@link #debug <code>debug</code>} switch constant enabling printing terminals and associated words grabbed during lexical analysis.</p> |
1196 | * @since 1.1 |
1197 | */ |
1198 | protected static final int LEXICAL = 0x02; |
1199 | /** |
1200 | * <p>{@link #debug <code>debug</code>} switch constant enabling all debugging.</p> |
1201 | * @since 1.1 |
1202 | */ |
1203 | protected static final int VERBOSE = 0xFF; |
1204 | |
1205 | /** |
1206 | * <p>Lexical analysis by command-line arguments using this <code>Lexicon</code>. The first I/O or lexical error that occurs during lexical analysis is printed to the standard error stream.</p> |
1207 | * |
1208 | |
1209 | } |
Began life as a copy of #2000387
Snippet is not live.
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #2000391 |
Snippet name: | class Lexicon |
Eternal ID of this version: | #2000391/1 |
Text MD5: | 7006facc4697d72152e73a064810dbe0 |
Author: | stefan |
Category: | javax |
Type: | New Tinybrain snippet |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-06-26 21:04:53 |
Source code size: | 41258 bytes / 1209 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 473 / 106 |
Referenced in: | [show references] |