Libraryless. Click here for Pure Java version (1728L/11K/26K).
1 | // Syntax: s == "bla" |
2 | |
3 | import javax.imageio.*; |
4 | import java.awt.image.*; |
5 | import java.awt.*; |
6 | import java.security.NoSuchAlgorithmException; |
7 | import java.security.MessageDigest; |
8 | import java.lang.reflect.*; |
9 | import java.net.*; |
10 | import java.io.*; |
11 | import javax.swing.*; |
12 | import java.util.regex.*; |
13 | import java.util.List; |
14 | import java.util.*; |
15 | |
16 | //1000300 // Lexicon |
17 | //1000515 // Lexicon, fixing |
18 | |
19 | |
20 | class JavaTok { |
21 | static String join(List<String> cnc) { |
22 | StringBuilder buf = new StringBuilder(); |
23 | for (String s : cnc) buf.append(s); |
24 | return buf.toString(); |
25 | } |
26 | |
27 | static List<String> split(String src) { |
28 | Java20 lex = new Java20(); |
29 | src = src.replace("\r\n", "\n"); |
30 | LineNumberReader source = new LineNumberReader(new StringReader(src)); |
31 | int lineNr = source.getLineNumber()+1; |
32 | List<T> list = new ArrayList<T>(); |
33 | try { |
34 | for (Object a; (a = lex.grab(source)) != lex.$;) { |
35 | String word = lex.word(); |
36 | String q = quote(word); |
37 | //System.out.println("grabbed at line " + lineNr + ": " + a + " " + q); |
38 | lineNr = source.getLineNumber()+1; |
39 | |
40 | T t = new T(a, word); |
41 | boolean isSpace = t.isSpace(); |
42 | if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) |
43 | list.get(list.size()-1).word += word; // merge spaces |
44 | else |
45 | list.add(t); |
46 | } |
47 | } catch (Lexicon.Exception e) { |
48 | throw new RuntimeException(e); |
49 | } |
50 | |
51 | List<String> cnc = new ArrayList<String>(); |
52 | for (int i = 0; i < list.size(); ) { |
53 | T t = list.get(i); |
54 | boolean shouldBeSpace = (cnc.size() % 2) == 0; |
55 | boolean isSpace = t.isSpace(); |
56 | if (shouldBeSpace == isSpace) { |
57 | cnc.add(t.word); |
58 | ++i; |
59 | } else if (shouldBeSpace) |
60 | cnc.add(""); |
61 | else { |
62 | System.out.println(cncToLines(cnc)); |
63 | throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word)); |
64 | } |
65 | } |
66 | if ((cnc.size() % 2) == 0) |
67 | cnc.add(""); |
68 | |
69 | return cnc; |
70 | } |
71 | |
72 | static class T { |
73 | Object a; String word; |
74 | |
75 | T(Object a, String word) { this.a = a; this.word = word; } |
76 | |
77 | boolean isSpace() { |
78 | return a.equals("WHITE_SPACE") || a.equals("COMMENT"); |
79 | } |
80 | } |
81 | |
82 | static String cncToLines(List<String> cnc) { |
83 | StringBuilder out = new StringBuilder(); |
84 | for (String token : cnc) |
85 | out.append(quote(token) + "\n"); |
86 | return out.toString(); |
87 | } |
88 | |
89 | public static String quote(String s) { |
90 | if (s == null) return "null"; |
91 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; |
92 | } |
93 | |
94 | static class Java20 extends Lexicon { |
95 | |
96 | Java20() { |
97 | /** |
98 | * Grammar for Java 2.0. |
99 | * |
100 | * Nonterminal - first letter uppercase |
101 | * TERMINAL - all letters uppercase |
102 | * keyword - all letters lowercase |
103 | */ |
104 | int INFINITY = -1; |
105 | |
106 | /** |
107 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
108 | */ |
109 | put("WHITE_SPACE", new Repetition(space(), 1, INFINITY)); |
110 | |
111 | /** |
112 | * 19.3 Terminals from section 3.7: Comment |
113 | */ |
114 | put("COMMENT", new Union( |
115 | |
116 | // |
117 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
118 | // |
119 | new Concatenation( |
120 | new Singleton("/*"), new Concatenation( |
121 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( |
122 | new Repetition( |
123 | new Concatenation( |
124 | new Singleton("*"), |
125 | new Repetition(new Concatenation( |
126 | new NonMatch("*/"), |
127 | new Repetition(new NonMatch("*"), 0, INFINITY) |
128 | ), 0, 1) |
129 | ), 0, INFINITY |
130 | ), |
131 | new Singleton("*/") |
132 | ))), new Union( |
133 | |
134 | /** |
135 | * End Of Line Comment: //[^\n]*\n |
136 | */ |
137 | new Concatenation( |
138 | new Singleton("//"), new Concatenation( |
139 | new Repetition(new NonMatch("\n"), 0, INFINITY), |
140 | new Singleton("\n") |
141 | )), |
142 | |
143 | // |
144 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
145 | // |
146 | new Concatenation( |
147 | new Singleton("/**"), new Concatenation( |
148 | new Repetition( |
149 | new Concatenation( |
150 | new Repetition(new Concatenation( |
151 | new NonMatch("*/"), |
152 | new Repetition(new NonMatch("*"), 0, INFINITY) |
153 | ), 0, 1), |
154 | new Singleton("*") |
155 | ), 0, INFINITY |
156 | ), |
157 | new Singleton("/") |
158 | )) |
159 | ))); |
160 | |
161 | put("IDENTIFIER", new Concatenation( |
162 | new Union( |
163 | alpha(), |
164 | new Match("_$") |
165 | ), |
166 | new Repetition( |
167 | new Union( |
168 | alnum(), |
169 | new Match("_$") |
170 | ), 0, INFINITY |
171 | ) |
172 | )); |
173 | |
174 | /** |
175 | * 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar) |
176 | */ |
177 | put("KEYWORD", new Union( |
178 | new Singleton("const"), |
179 | new Singleton("goto") |
180 | )); |
181 | |
182 | /** |
183 | * 19.3 Terminals from section 3.10.1: Integer Literal |
184 | */ |
185 | put("INTEGER_LITERAL", new Concatenation( |
186 | new Union( |
187 | /** |
188 | * Decimal Integer Literal: 0|[1-9][[:digit:]]* |
189 | */ |
190 | new Singleton("0"), new Union( |
191 | |
192 | new Concatenation( |
193 | new Range('1', '9'), |
194 | new Repetition(digit(), 0, INFINITY) |
195 | ), new Union( |
196 | |
197 | /** |
198 | * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ |
199 | */ |
200 | new Concatenation( |
201 | new Singleton("0"), new Concatenation( |
202 | new Match("xX"), |
203 | new Repetition(xdigit(), 1, INFINITY) |
204 | )), |
205 | |
206 | /** |
207 | * Octal Integer Literal: 0[0-7]+ |
208 | */ |
209 | new Concatenation( |
210 | new Singleton("0"), |
211 | new Repetition(new Range('0', '7'), 1, INFINITY) |
212 | ) |
213 | ))), |
214 | new Repetition(new Match("lL"), 0, 1) |
215 | )); |
216 | |
217 | /** |
218 | * 19.3 Terminals from section 3.10.2: Floating-Point Literal |
219 | */ |
220 | put("FLOATING_POINT_LITERAL", new Union( |
221 | |
222 | /** |
223 | * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? |
224 | */ |
225 | new Concatenation( |
226 | new Repetition(digit(), 1, INFINITY), new Concatenation( |
227 | new Singleton("."), new Concatenation( |
228 | new Repetition(digit(), 0, INFINITY), new Concatenation( |
229 | new Repetition(new Concatenation( |
230 | new Match("eE"), new Concatenation( |
231 | new Repetition(new Match("-+"), 0, 1), |
232 | new Repetition(digit(), 1, INFINITY) |
233 | )), 0, 1), |
234 | new Repetition(new Match("fFdD"), 0, 1) |
235 | )))), new Union( |
236 | |
237 | /** |
238 | * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? |
239 | */ |
240 | new Concatenation( |
241 | new Singleton("."), new Concatenation( |
242 | new Repetition(digit(), 1, INFINITY), new Concatenation( |
243 | new Repetition(new Concatenation( |
244 | new Match("eE"), new Concatenation( |
245 | new Repetition(new Match("-+"), 0, 1), |
246 | new Repetition(digit(), 1, INFINITY) |
247 | )), 0, 1), |
248 | new Repetition(new Match("fFdD"), 0, 1) |
249 | ))), new Union( |
250 | |
251 | /** |
252 | * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? |
253 | */ |
254 | new Concatenation( |
255 | new Repetition(digit(), 1, INFINITY), new Concatenation( |
256 | new Match("eE"), new Concatenation( |
257 | new Repetition(new Match("-+"), 0, 1), new Concatenation( |
258 | new Repetition(digit(), 1, INFINITY), |
259 | new Repetition(new Match("fFdD"), 0, 1) |
260 | )))), |
261 | |
262 | /** |
263 | * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] |
264 | */ |
265 | new Concatenation( |
266 | new Repetition(digit(), 1, INFINITY), new Concatenation( |
267 | new Repetition(new Concatenation( |
268 | new Match("eE"), new Concatenation( |
269 | new Repetition(new Match("-+"), 0, 1), |
270 | new Repetition(digit(), 1, INFINITY) |
271 | )), 0, 1), |
272 | new Match("fFdD") |
273 | )) |
274 | )))); |
275 | |
276 | /** |
277 | * 19.3 Terminals from section 3.10.3: Boolean Literal |
278 | */ |
279 | put("BOOLEAN_LITERAL", new Union( |
280 | new Singleton("true"), |
281 | new Singleton("false") |
282 | )); |
283 | |
284 | /** |
285 | * 19.3 Terminals from section 3.10.4: Character Literal |
286 | */ |
287 | put("CHARACTER_LITERAL", new Concatenation( |
288 | new Singleton("'"), new Concatenation( |
289 | new Union( |
290 | |
291 | /** |
292 | * Single Character: [^\r\n'\\] |
293 | */ |
294 | new NonMatch("\r\n'\\"), |
295 | |
296 | /** |
297 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
298 | */ |
299 | new Concatenation( |
300 | new Singleton("\\"), |
301 | new Union( |
302 | new Match("btnfr\"'\\"), |
303 | new Concatenation( |
304 | new Repetition(new Range('0', '3'), 0, 1), |
305 | new Repetition(new Range('0', '7'), 1, 2) |
306 | ) |
307 | ) |
308 | ) |
309 | ), |
310 | new Singleton("'") |
311 | ))); |
312 | |
313 | put("MULTILINE_LITERAL", new Concatenation( |
314 | new Singleton("[["), new Concatenation( |
315 | new Repetition( |
316 | new Union( |
317 | new NonMatch("]"), |
318 | new Concatenation( |
319 | new Singleton("]"), new NonMatch("]")) |
320 | ), 0, INFINITY |
321 | ), |
322 | new Singleton("]]") |
323 | ))); |
324 | |
325 | put("MULTILINE_LITERAL2", new Concatenation( |
326 | new Singleton("[=["), new Concatenation( |
327 | new Repetition( |
328 | new Union( |
329 | new NonMatch("]"), |
330 | new Concatenation(new Singleton("]"), new Union( |
331 | new NonMatch("="), |
332 | new Concatenation(new Singleton("="), new NonMatch("]")))) |
333 | ), 0, INFINITY |
334 | ), |
335 | new Singleton("]=]") |
336 | ))); |
337 | |
338 | /** |
339 | * 19.3 Terminals from section 3.10.5: String Literal |
340 | */ |
341 | put("STRING_LITERAL", new Concatenation( |
342 | new Singleton("\""), new Concatenation( |
343 | new Repetition( |
344 | new Union( |
345 | |
346 | /** |
347 | * Single Character: [^\r\n"\\] |
348 | */ |
349 | new NonMatch("\r\n\"\\"), |
350 | |
351 | /** |
352 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
353 | */ |
354 | new Concatenation( |
355 | new Singleton("\\"), |
356 | new Union( |
357 | new Match("btnfr\"'\\"), |
358 | new Union( |
359 | new Concatenation( |
360 | new Repetition(new Range('0', '3'), 0, 1), |
361 | new Repetition(new Range('0', '7'), 1, 2) |
362 | ), |
363 | new Concatenation( |
364 | new Singleton("u"), |
365 | new Repetition(new Match("0123456789abcdefABCDEF"), 4, 4) |
366 | ) |
367 | ) |
368 | ) |
369 | ) |
370 | ), 0, INFINITY |
371 | ), |
372 | new Singleton("\"") |
373 | ))); |
374 | |
375 | /** |
376 | * 19.3 Terminals section 3.10.7: Null Literal |
377 | */ |
378 | put("NULL_LITERAL", new Singleton("null")); |
379 | |
380 | // OK, it seems we have to add some more stuff... |
381 | |
382 | //put("OTHER1", new Match(";{}=,<>[]().+-:|&!")); |
383 | //put("OTHER1", new NonMatch("")); // catch anything, one character at a time |
384 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time |
385 | |
386 | } |
387 | } // class Java20 |
388 | } |
389 | |
390 | /** |
391 | * <p>This class implements a {@link Lexicon}.</p> |
392 | * |
393 | * @version 1.3 |
394 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
395 | */ |
396 | class Lexicon { |
397 | //Q |
398 | /** |
399 | * <p>The number of lexical NFA states constructed.</p> |
400 | */ |
401 | private /*static*/ int QSize = 0; |
402 | |
403 | /** |
404 | * <p>Creates a new state in the lexical NFA.</p> |
405 | * |
406 | * @return a new state in the lexical NFA. |
407 | */ |
408 | private /*static*/ Integer s() { |
409 | return ++QSize; |
410 | } |
411 | //delta |
412 | /** |
413 | * <p>The transition relation of the lexical NFA.</p> |
414 | */ |
415 | private /*static*/ final Stack<Stack<Object>> delta = new Stack<Stack<Object>>(); |
416 | |
417 | /** |
418 | * <p>Puts a transition into the lexical NFA.</p> |
419 | * |
420 | * @param s the state from which the transition is made. |
421 | * @param A the <code>Alphabet</code> on which the transition is made. |
422 | * @param r the state to which the transition is made. |
423 | */ |
424 | private /*static*/ void put(Integer s, Alphabet A, Integer r) { |
425 | |
426 | if (Math.max(s,r) >= delta.size()) delta.setSize(Math.max(s,r)+1); |
427 | |
428 | Stack<Object> pairs = delta.get(s); |
429 | if (pairs == null) delta.set(s, pairs = new Stack<Object>()); |
430 | |
431 | pairs.push(A); |
432 | pairs.push(r); |
433 | } |
434 | //Set |
435 | /** |
436 | * <p>This class implements a {@link Lexicon.Set <code>Set</code>}.</p> |
437 | * |
438 | * @version 1.3 |
439 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
440 | * @param <E> the element type. |
441 | */ |
442 | static class Set<E> extends Stack<E> { |
443 | |
444 | /** |
445 | * <p>The null exclusion indicator. If <code>true</code>, <code>add</code> methods will not add <code>null</code> to this <code>Set</code>.</p> |
446 | */ |
447 | private final boolean excludeNull; |
448 | |
449 | /** |
450 | * <p>Constructs a <code>Set</code> with an initial capacity.</p> |
451 | * |
452 | * @param capacity the initial capacity. The magnitude of <code>capacity</code> is the initial capacity. The null exclusion indicator is initialized to <code>true</code> if <code>capacity</code> is negative. |
453 | */ |
454 | Set(int capacity) { |
455 | super(); |
456 | ensureCapacity(Math.abs(capacity)); |
457 | excludeNull = (capacity < 0); |
458 | } |
459 | |
460 | /** |
461 | * <p>Adds an element to this <code>Set</code>. The element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> |
462 | * |
463 | * @param element the element to add to this <code>Set</code>. |
464 | * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. |
465 | */ |
466 | public boolean add(E element) { |
467 | if (excludeNull && element == null || contains(element)) return false; |
468 | push(element); |
469 | return true; |
470 | } |
471 | |
472 | /** |
473 | * <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> |
474 | * |
475 | * @param index the index in <code>S</code> beyond which elements are added. |
476 | * @param S the <code>Set</code> to add to this <code>Set</code>. |
477 | * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. |
478 | */ |
479 | boolean add(int index, Set<E> S) { |
480 | if (S == null) return false; |
481 | boolean push = isEmpty(); |
482 | boolean add = false; |
483 | |
484 | for (int i = index; i < S.size(); i++) { |
485 | E element = S.get(i); |
486 | |
487 | if (!(excludeNull && element == null)) |
488 | if (push) { |
489 | push(element); |
490 | add = true; |
491 | } |
492 | else if (add(element)) |
493 | add = true; |
494 | } |
495 | return add; |
496 | } |
497 | |
498 | /** |
499 | * <p>Adds a <code>Set</code> of elements to this <code>Set</code>. An element is not added if it occurs in this <code>Set</code> or it is <code>null</code> and the null exclusion indicator is <code>true</code>. The capacity is expanded if necessary.</p> |
500 | * |
501 | * @param S the <code>Set</code> to add to this <code>Set</code>. |
502 | * @return <code>true</code> if this <code>Set</code> is changed; <code>false</code> otherwise. |
503 | */ |
504 | boolean add(Set<E> S) { |
505 | return add(0, S); |
506 | } |
507 | |
508 | public String toString() { |
509 | StringBuffer result = new StringBuffer(80); |
510 | result.append('{'); |
511 | |
512 | for (int i = 0; i < size(); i++) { |
513 | if (i > 0) result.append(' '); |
514 | result.append(get(i)); |
515 | } |
516 | result.append('}'); |
517 | return result.toString(); |
518 | } |
519 | //Set |
520 | } |
521 | //I |
522 | /** |
523 | * <p>The initial states of the lexical NFA. When empty, there is a need to compute the current initial states. It is computed only on demand created by {@link #initial()}.</p> |
524 | */ |
525 | private final Set<Integer> I; |
526 | //F |
527 | /** |
528 | * <p>The final states of the lexical NFA. A final state is mapped to the terminal it accepts in this <code>Lexicon</code>. When empty, there is a need to compute current final states. It is computed only on demand created by {@link #initial()}.</p> |
529 | */ |
530 | private final Map<Integer, Object> F; |
531 | //Lexicon.transition |
532 | /** |
533 | * <p>Computes a transition using the lexical NFA.</p> |
534 | * |
535 | * @param S the states from which the transition is made. |
536 | * @param a the character on which the transition is made. |
537 | * @param R the states to which the transition is made. |
538 | * @return the states to which the transition is made. |
539 | */ |
540 | private /*static*/ Set<Integer> transition(Set<Integer> S, char a, Set<Integer> R) { |
541 | R.clear(); |
542 | |
543 | for (Integer s : S) { |
544 | Stack<Object> pairs = delta.get(s); |
545 | |
546 | if (pairs != null) |
547 | for (int k = 0; k < pairs.size(); k += 2) { |
548 | Alphabet A = (Alphabet)pairs.get(k); |
549 | |
550 | if (A != null) { |
551 | Integer r = (Integer)pairs.get(k+1); |
552 | if (A.contains(a)) R.add(r); |
553 | } |
554 | } |
555 | } |
556 | return R; |
557 | } |
558 | //Lexicon.closure |
559 | /** |
560 | * <p>Computes a reflexive transitive closure under empty transition using the lexical NFA. The closure is computed in place by a breadth-first search expanding <code>S</code>.</p> |
561 | * |
562 | * @param S the states whose reflexive transitive closure is computed under empty transition. |
563 | * @return the reflexive transitive closure of <code>S</code> under empty transition. |
564 | */ |
565 | private /*static*/ Set<Integer> closure(Set<Integer> S) { |
566 | |
567 | for (int i = 0; i < S.size(); i++) { |
568 | Integer s = S.get(i); |
569 | Stack<Object> pairs = delta.get(s); |
570 | |
571 | if (pairs != null) |
572 | for (int k = 0; k < pairs.size(); k += 2) { |
573 | Alphabet A = (Alphabet)pairs.get(k); |
574 | |
575 | if (A == null) { |
576 | Integer r = (Integer)pairs.get(k+1); |
577 | S.add(r); |
578 | } |
579 | } |
580 | } |
581 | return S; |
582 | } |
583 | //Expression |
584 | /** |
585 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a regular language.</p> |
586 | * |
587 | * @version 1.3 |
588 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
589 | */ |
590 | abstract public /*static*/ class Expression implements Cloneable { |
591 | |
592 | /** |
593 | * <p>The initial state of the NFA constructed from this <code>Expression</code>.</p> |
594 | */ |
595 | Integer i; |
596 | /** |
597 | * <p>The final state of the NFA constructed from this <code>Expression</code>.</p> |
598 | */ |
599 | Integer f; |
600 | |
601 | /** |
602 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
603 | * |
604 | * @return a clone of this <code>Expression</code>. |
605 | */ |
606 | abstract public Object clone(); |
607 | } |
608 | //Alphabet |
609 | /** |
610 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} of character symbols.</p> |
611 | * |
612 | * @version 1.3 |
613 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
614 | */ |
615 | abstract public /*static*/ class Alphabet extends Expression { |
616 | |
617 | /** |
618 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
619 | * |
620 | * @param a the character whose status is requested. |
621 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
622 | */ |
623 | abstract boolean contains(char a); |
624 | } |
625 | //Match |
626 | /** |
627 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing some characters.</p> |
628 | * |
629 | * @version 1.3 |
630 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
631 | */ |
632 | public /*static*/ class Match extends Alphabet { |
633 | |
634 | /** |
635 | * <p>The {@link Character} or {@link String} representing this <code>Alphabet</code>.</p> |
636 | */ |
637 | final Object A; |
638 | |
639 | /** |
640 | * <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p> |
641 | * |
642 | * @param i the initial state of the NFA constructed. |
643 | * @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>. |
644 | * @param f the final state of the NFA constructed. |
645 | */ |
646 | private Match(Integer i, Object A, Integer f) { |
647 | this.A = A; |
648 | put(this.i = i, this, this.f = f); |
649 | } |
650 | |
651 | /** |
652 | * <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p> |
653 | * |
654 | * @param i the initial state of the NFA constructed. |
655 | * @param a the character in this <code>Alphabet</code>. |
656 | * @param f the final state of the NFA constructed. |
657 | */ |
658 | private Match(Integer i, char a, Integer f) { |
659 | this(i, new Character(a), f); |
660 | } |
661 | |
662 | /** |
663 | * <p>Constructs an <code>Alphabet</code> containing one character, and builds the NFA constructed from this <code>Expression</code>.</p> |
664 | * |
665 | * @param a the character in this <code>Alphabet</code>. |
666 | */ |
667 | public Match(char a) { |
668 | this(s(), a, s()); |
669 | } |
670 | |
671 | /** |
672 | * <p>Constructs an <code>Alphabet</code> containing some characters, and builds the NFA constructed from this <code>Expression</code>.</p> |
673 | * |
674 | * @param A the {@link Character} or {@link String} of characters in this <code>Alphabet</code>. |
675 | */ |
676 | public Match(Object A) { |
677 | this(s(), A, s()); |
678 | } |
679 | |
680 | /** |
681 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
682 | * |
683 | * @param a the character whose status is requested. |
684 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
685 | */ |
686 | boolean contains(char a) { |
687 | if (A instanceof Character) |
688 | return (Character)A == a; |
689 | |
690 | if (A instanceof String) |
691 | return ((String)A).indexOf(a) != -1; |
692 | |
693 | if (A instanceof Stack<?>) |
694 | for (Alphabet alphabet : (Stack<Alphabet>)A) |
695 | if (alphabet.contains(a)) return true; |
696 | return false; |
697 | } |
698 | |
699 | /** |
700 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
701 | * |
702 | * @return a clone of this <code>Alphabet</code>. |
703 | */ |
704 | public Object clone() { |
705 | return new Match(A); |
706 | } |
707 | } |
708 | //NonMatch |
709 | /** |
710 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing all except some characters.</p> |
711 | * |
712 | * @version 1.3 |
713 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
714 | */ |
715 | public /*static*/ class NonMatch extends Match { |
716 | |
717 | /** |
718 | * <p>Constructs an <code>Alphabet</code> containing all characters except one, and builds the NFA constructed from this <code>Expression</code>.</p> |
719 | * |
720 | * @param a the character not in this <code>Alphabet</code>. |
721 | */ |
722 | public NonMatch(char a) { |
723 | super(a); |
724 | } |
725 | |
726 | /** |
727 | * <p>Constructs an <code>Alphabet</code> containing all characters except some, and builds the NFA constructed from this <code>Expression</code>.</p> |
728 | * |
729 | * @param A the {@link Character} or {@link String} of characters not in this <code>Alphabet</code>. |
730 | */ |
731 | public NonMatch(Object A) { |
732 | super(A); |
733 | } |
734 | |
735 | /** |
736 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
737 | * |
738 | * @param a the character whose status is requested. |
739 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
740 | */ |
741 | boolean contains(char a) { |
742 | return a != (char)-1 && !super.contains(a); |
743 | } |
744 | |
745 | /** |
746 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
747 | * |
748 | * @return a clone of this <code>Alphabet</code>. |
749 | */ |
750 | public Object clone() { |
751 | return new NonMatch(A); |
752 | } |
753 | } |
754 | //Range |
755 | /** |
756 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a range.</p> |
757 | * |
758 | * @version 1.3 |
759 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
760 | */ |
761 | public /*static*/ class Range extends Alphabet { |
762 | |
763 | /** |
764 | * <p>The first character in the range.</p> |
765 | */ |
766 | private final char a1; |
767 | /** |
768 | * <p>The last character in the range.</p> |
769 | */ |
770 | private final char a2; |
771 | |
772 | /** |
773 | * <p>Constructs an <code>Alphabet</code> containing the characters in a range, and builds the NFA constructed from this <code>Expression</code>.</p> |
774 | * |
775 | * @param a1 the first character in the range. |
776 | * @param a2 the last character in the range. |
777 | */ |
778 | public Range(char a1, char a2) { |
779 | this.a1 = a1; |
780 | this.a2 = a2; |
781 | put(i = s(), this, f = s()); |
782 | } |
783 | |
784 | /** |
785 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
786 | * |
787 | * @param a the character whose status is requested. |
788 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
789 | */ |
790 | boolean contains(char a) { |
791 | return a1 <= a && a <= a2; |
792 | } |
793 | |
794 | /** |
795 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
796 | * |
797 | * @return a clone of this <code>Alphabet</code>. |
798 | */ |
799 | public Object clone() { |
800 | return new Range(a1, a2); |
801 | } |
802 | } |
803 | |
804 | /** |
805 | * <p>Creates an <code>Alphabet</code> containing the uppercase alphabetic characters.</p> |
806 | * |
807 | * @return an <code>Alphabet</code> containing the uppercase alphabetic characters. |
808 | */ |
809 | public /*static*/ PosixClass upper() { |
810 | return new PosixClass(0x0001); |
811 | } |
812 | |
813 | /** |
814 | * <p>Creates an <code>Alphabet</code> containing the lowercase alphabetic characters.</p> |
815 | * |
816 | * @return an <code>Alphabet</code> containing the lowercase alphabetic characters. |
817 | */ |
818 | public /*static*/ PosixClass lower() { |
819 | return new PosixClass(0x0002); |
820 | } |
821 | |
822 | /** |
823 | * <p>Creates an <code>Alphabet</code> containing the alphabetic characters.</p> |
824 | * |
825 | * @return an <code>Alphabet</code> containing the alphabetic characters. |
826 | */ |
827 | public /*static*/ PosixClass alpha() { |
828 | return new PosixClass(0x0004); |
829 | } |
830 | |
831 | /** |
832 | * <p>Creates an <code>Alphabet</code> containing the decimal digit characters.</p> |
833 | * |
834 | * @return an <code>Alphabet</code> containing the decimal digit characters. |
835 | */ |
836 | public /*static*/ PosixClass digit() { |
837 | return new PosixClass(0x0008); |
838 | } |
839 | |
840 | /** |
841 | * <p>Creates an <code>Alphabet</code> containing the hexadecimal digit characters.</p> |
842 | * |
843 | * @return an <code>Alphabet</code> containing the hexadecimal digit characters. |
844 | */ |
845 | public /*static*/ PosixClass xdigit() { |
846 | return new PosixClass(0x0010); |
847 | } |
848 | |
849 | /** |
850 | * <p>Creates an <code>Alphabet</code> containing the alphanumeric characters.</p> |
851 | * |
852 | * @return an <code>Alphabet</code> containing the alphanumeric characters. |
853 | */ |
854 | public /*static*/ PosixClass alnum() { |
855 | return new PosixClass(0x0020); |
856 | } |
857 | |
858 | /** |
859 | * <p>Creates an <code>Alphabet</code> containing the punctuation characters.</p> |
860 | * |
861 | * @return an <code>Alphabet</code> containing the punctuation characters. |
862 | */ |
863 | public /*static*/ PosixClass punct() { |
864 | return new PosixClass(0x0040); |
865 | } |
866 | |
867 | /** |
868 | * <p>Creates an <code>Alphabet</code> containing the graphical characters.</p> |
869 | * |
870 | * @return an <code>Alphabet</code> containing the graphical characters. |
871 | */ |
872 | public /*static*/ PosixClass graph() { |
873 | return new PosixClass(0x0080); |
874 | } |
875 | |
876 | /** |
877 | * <p>Creates an <code>Alphabet</code> containing the printable characters.</p> |
878 | * |
879 | * @return an <code>Alphabet</code> containing the printable characters. |
880 | */ |
881 | public /*static*/ PosixClass print() { |
882 | return new PosixClass(0x0100); |
883 | } |
884 | |
885 | /** |
886 | * <p>Creates an <code>Alphabet</code> containing the blank characters.</p> |
887 | * |
888 | * @return an <code>Alphabet</code> containing the blank characters. |
889 | */ |
890 | public /*static*/ PosixClass blank() { |
891 | return new PosixClass(0x0200); |
892 | } |
893 | |
894 | /** |
895 | * <p>Creates an <code>Alphabet</code> containing the space characters.</p> |
896 | * |
897 | * @return an <code>Alphabet</code> containing the space characters. |
898 | */ |
899 | public /*static*/ PosixClass space() { |
900 | return new PosixClass(0x0400); |
901 | } |
902 | |
903 | /** |
904 | * <p>Creates an <code>Alphabet</code> containing the control characters.</p> |
905 | * |
906 | * @return an <code>Alphabet</code> containing the control characters. |
907 | */ |
908 | public /*static*/ PosixClass cntrl() { |
909 | return new PosixClass(0x0800); |
910 | } |
911 | |
912 | /** |
913 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a POSIX character class.</p> |
914 | * |
915 | * @version 1.3 |
916 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
917 | */ |
918 | public /*static*/ class PosixClass extends Alphabet { |
919 | |
920 | /** |
921 | * <p>The bit mask representing this <code>PosixClass</code>.</p> |
922 | */ |
923 | private final int posixClass; |
924 | |
925 | /** |
926 | * <p>Constructs an <code>Alphabet</code> containing the characters in a POSIX character class, and builds the NFA constructed from this <code>Expression</code>.</p> |
927 | * |
928 | * @param posixClass the bit mask representing this <code>PosixClass</code>. |
929 | */ |
930 | private PosixClass(int posixClass) { |
931 | this.posixClass = posixClass; |
932 | put(i = s(), this, f = s()); |
933 | } |
934 | |
935 | /** |
936 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
937 | * |
938 | * @param a the character whose status is requested. |
939 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
940 | */ |
941 | boolean contains(char a) { |
942 | int UPPER = 0x0001; int LOWER = 0x0002; |
943 | int ALPHA = 0x0004; int DIGIT = 0x0008; |
944 | int XDIGIT = 0x0010; int ALNUM = 0x0020; |
945 | int PUNCT = 0x0040; int GRAPH = 0x0080; |
946 | int PRINT = 0x0100; int BLANK = 0x0200; |
947 | int SPACE = 0x0400; int CNTRL = 0x0800; |
948 | int classes = 0; |
949 | |
950 | switch (Character.getType(a)) { |
951 | default: break; |
952 | case Character.UPPERCASE_LETTER: |
953 | classes |= UPPER | ALPHA | (('A' <= a && a <= 'F') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break; |
954 | case Character.LOWERCASE_LETTER: |
955 | classes |= LOWER | ALPHA | (('a' <= a && a <= 'f') ? XDIGIT : 0) | ALNUM | GRAPH | PRINT; break; |
956 | case Character.TITLECASE_LETTER: |
957 | case Character.MODIFIER_LETTER: |
958 | case Character.OTHER_LETTER: |
959 | classes |= ALPHA | ALNUM | GRAPH | PRINT; break; |
960 | case Character.NON_SPACING_MARK: |
961 | case Character.COMBINING_SPACING_MARK: |
962 | case Character.ENCLOSING_MARK: |
963 | classes |= PUNCT | GRAPH | PRINT; break; |
964 | case Character.DECIMAL_DIGIT_NUMBER: |
965 | classes |= DIGIT | XDIGIT | ALNUM | GRAPH | PRINT; break; |
966 | case Character.LETTER_NUMBER: |
967 | case Character.OTHER_NUMBER: |
968 | classes |= ALNUM | GRAPH | PRINT; break; |
969 | case Character.CONNECTOR_PUNCTUATION: |
970 | case Character.DASH_PUNCTUATION: |
971 | case Character.START_PUNCTUATION: |
972 | case Character.END_PUNCTUATION: |
973 | case Character.INITIAL_QUOTE_PUNCTUATION: |
974 | case Character.FINAL_QUOTE_PUNCTUATION: |
975 | case Character.OTHER_PUNCTUATION: |
976 | case Character.MATH_SYMBOL: |
977 | case Character.CURRENCY_SYMBOL: |
978 | case Character.MODIFIER_SYMBOL: |
979 | case Character.OTHER_SYMBOL: |
980 | classes |= PUNCT | GRAPH | PRINT; break; |
981 | case Character.SPACE_SEPARATOR: |
982 | classes |= PRINT | BLANK | SPACE; break; |
983 | case Character.LINE_SEPARATOR: |
984 | case Character.PARAGRAPH_SEPARATOR: |
985 | break; |
986 | case Character.CONTROL: |
987 | classes |= ((a == '\t') ? BLANK : 0) | ((a == '\t' || a == '\n' || a == '\013' || a == '\f' || a == '\r') ? SPACE : 0) | CNTRL; break; |
988 | case Character.FORMAT: |
989 | case Character.SURROGATE: |
990 | case Character.PRIVATE_USE: |
991 | case Character.UNASSIGNED: |
992 | break; |
993 | } |
994 | return (classes & posixClass) != 0; |
995 | } |
996 | |
997 | /** |
998 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
999 | * |
1000 | * @return a clone of this <code>Alphabet</code>. |
1001 | */ |
1002 | public Object clone() { |
1003 | return new PosixClass(posixClass); |
1004 | } |
1005 | } |
1006 | //UnicodeCategory |
1007 | /** |
1008 | * <p>This class implements an {@link Lexicon.Alphabet <code>Alphabet</code>} containing the characters in a Unicode general category.</p> |
1009 | * |
1010 | * @version 1.3 |
1011 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
1012 | */ |
1013 | public /*static*/ class UnicodeCategory extends Alphabet { |
1014 | |
1015 | /** |
1016 | * <p>The byte representing the Unicode general category.</p> |
1017 | */ |
1018 | private final byte category; |
1019 | |
1020 | /** |
1021 | * <p>Constructs an <code>Alphabet</code> containing the characters in a Unicode general category, and builds the NFA constructed from this <code>Expression</code>. The class {@link Character} defines byte constants representing each of the Unicode general categories.</p> |
1022 | * |
1023 | * @param category The byte representing the Unicode general category. |
1024 | * @see Character |
1025 | */ |
1026 | public UnicodeCategory(byte category) { |
1027 | this.category = category; |
1028 | put(i = s(), this, f = s()); |
1029 | } |
1030 | |
1031 | /** |
1032 | * <p>Indicates whether a character occurs in this <code>Alphabet</code>.</p> |
1033 | * |
1034 | * @param a the character whose status is requested. |
1035 | * @return <code>true</code> if <code>a</code> occurs in this <code>Alphabet</code>; <code>false</code> otherwise. |
1036 | */ |
1037 | boolean contains(char a) { |
1038 | return Character.getType(a) == category; |
1039 | } |
1040 | |
1041 | /** |
1042 | * <p>Creates a clone of this <code>Alphabet</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
1043 | * |
1044 | * @return a clone of this <code>Alphabet</code>. |
1045 | */ |
1046 | public Object clone() { |
1047 | return new UnicodeCategory(category); |
1048 | } |
1049 | } |
1050 | //Repetition |
1051 | /** |
1052 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the repetition of a regular language.</p> |
1053 | * |
1054 | * @version 1.3 |
1055 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
1056 | */ |
1057 | public /*static*/ class Repetition extends Expression { |
1058 | |
1059 | /** |
1060 | * <p>The operand <code>Expression</code>.</p> |
1061 | */ |
1062 | private final Expression e1; |
1063 | /** |
1064 | * <p>The minimum number of times <code>e1</code> is repeated.</p> |
1065 | */ |
1066 | private final int min; |
1067 | /** |
1068 | * <p>The maximum number of times <code>e1</code> is repeated.</p> |
1069 | */ |
1070 | private final int max; |
1071 | |
1072 | /** |
1073 | * <p>Constructs an <code>Expression</code> expressing the repetition of a regular language, and builds the NFA constructed from this <code>Expression</code>. Large finite values for the minimum or maximum cause the NFA constructed from the operand <code>Expression</code> to be copied many times, resulting in a space-inefficient NFA.</p> |
1074 | * |
1075 | * @param e1 the operand <code>Expression</code>. |
1076 | * @param min the minimum number of times <code>e1</code> is repeated. If negative, it is assumed to be zero. |
1077 | * @param max the maximum number of times <code>e1</code> is repeated. If negative, it is assumed to be infinity. |
1078 | */ |
1079 | public Repetition(Expression e1, int min, int max) { |
1080 | this.e1 = e1 = (Expression)e1.clone(); |
1081 | this.min = min = Math.max(min, 0); |
1082 | this.max = max; |
1083 | |
1084 | i = (min > 0) ? e1.i : s(); |
1085 | f = (min > 0) ? e1.f : i; |
1086 | |
1087 | if (min == 0 && max < 0) { |
1088 | put(i, null, e1.i); |
1089 | put(e1.f, null, i); |
1090 | } |
1091 | else { |
1092 | for (int k = 2; k <= min; k++) { |
1093 | e1 = (Expression)e1.clone(); |
1094 | put(f, null, e1.i); |
1095 | f = e1.f; |
1096 | } |
1097 | if (max > min) { |
1098 | Integer tail = f; |
1099 | put(tail, null, f = s()); |
1100 | |
1101 | for (int k = min+1; k <= max; k++) { |
1102 | if (k > 1) e1 = (Expression)e1.clone(); |
1103 | put(tail, null, e1.i); |
1104 | put(tail = e1.f, null, f); |
1105 | } |
1106 | } |
1107 | else if (max < 0) put(f, null, e1.i); |
1108 | } |
1109 | } |
1110 | |
1111 | /** |
1112 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
1113 | * |
1114 | * @return a clone of this <code>Expression</code>. |
1115 | */ |
1116 | public Object clone() { |
1117 | return new Repetition(e1, min, max); |
1118 | } |
1119 | } |
1120 | //Concatenation |
1121 | /** |
1122 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the concatenation of two regular languages.</p> |
1123 | * |
1124 | * @version 1.3 |
1125 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
1126 | */ |
1127 | public /*static*/ class Concatenation extends Expression { |
1128 | |
1129 | /** |
1130 | * <p>The left operand <code>Expression</code>.</p> |
1131 | */ |
1132 | private final Expression e1; |
1133 | /** |
1134 | * <p>The right operand <code>Expression</code>.</p> |
1135 | */ |
1136 | private final Expression e2; |
1137 | |
1138 | /** |
1139 | * <p>Constructs an <code>Expression</code> expressing the concatenation of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p> |
1140 | * |
1141 | * @param e1 the left operand <code>Expression</code>. |
1142 | * @param e2 the right operand <code>Expression</code>. |
1143 | */ |
1144 | public Concatenation(Expression e1, Expression e2) { |
1145 | this.e1 = e1 = (Expression)e1.clone(); |
1146 | this.e2 = e2 = (Expression)e2.clone(); |
1147 | |
1148 | i = e1.i; |
1149 | f = e2.f; |
1150 | |
1151 | put(e1.f, null, e2.i); |
1152 | } |
1153 | |
1154 | /** |
1155 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
1156 | * |
1157 | * @return a clone of this <code>Expression</code>. |
1158 | */ |
1159 | public Object clone() { |
1160 | return new Concatenation(e1, e2); |
1161 | } |
1162 | } |
1163 | //Singleton |
1164 | /** |
1165 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing a singleton language.</p> |
1166 | * |
1167 | * @version 1.3 |
1168 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
1169 | */ |
1170 | public /*static*/ class Singleton extends Expression { |
1171 | |
1172 | /** |
1173 | * <p>The string whose singleton language is expressed.</p> |
1174 | */ |
1175 | private final String x; |
1176 | |
1177 | /** |
1178 | * <p>Constructs an <code>Expression</code> expressing a singleton language, and builds the NFA constructed from this <code>Expression</code>.</p> |
1179 | * |
1180 | * @param x the string whose singleton language is expressed. |
1181 | */ |
1182 | public Singleton(String x) { |
1183 | this.x = x; |
1184 | |
1185 | f = i = s(); |
1186 | |
1187 | for (char c : x.toCharArray()) |
1188 | new Match(f, c, f = s()); |
1189 | } |
1190 | |
1191 | /** |
1192 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
1193 | * |
1194 | * @return a clone of this <code>Expression</code>. |
1195 | */ |
1196 | public Object clone() { |
1197 | return new Singleton(x); |
1198 | } |
1199 | } |
1200 | //Union |
1201 | /** |
1202 | * <p>This class implements an {@link Lexicon.Expression <code>Expression</code>} expressing the union of two regular languages.</p> |
1203 | * |
1204 | * @version 1.3 |
1205 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
1206 | */ |
1207 | public /*static*/ class Union extends Expression { |
1208 | |
1209 | /** |
1210 | * <p>The left operand <code>Expression</code>.</p> |
1211 | */ |
1212 | private final Expression e1; |
1213 | /** |
1214 | * <p>The right operand <code>Expression</code>.</p> |
1215 | */ |
1216 | private final Expression e2; |
1217 | |
1218 | /** |
1219 | * <p>Constructs an <code>Expression</code> expressing the union of two regular languages, and builds the NFA constructed from this <code>Expression</code>.</p> |
1220 | * |
1221 | * @param e1 the left operand <code>Expression</code>. |
1222 | * @param e2 the right operand <code>Expression</code>. |
1223 | */ |
1224 | public Union(Expression e1, Expression e2) { |
1225 | this.e1 = e1 = (Expression)e1.clone(); |
1226 | this.e2 = e2 = (Expression)e2.clone(); |
1227 | |
1228 | i = s(); |
1229 | f = s(); |
1230 | |
1231 | put(i, null, e1.i); put(e1.f, null, f); |
1232 | put(i, null, e2.i); put(e2.f, null, f); |
1233 | } |
1234 | |
1235 | /** |
1236 | * <p>Creates a clone of this <code>Expression</code>, and replicates the NFA constructed from this <code>Expression</code>.</p> |
1237 | * |
1238 | * @return a clone of this <code>Expression</code>. |
1239 | */ |
1240 | public Object clone() { |
1241 | return new Union(e1, e2); |
1242 | } |
1243 | } |
1244 | |
1245 | /** |
1246 | * <p>The mapping representing this <code>Lexicon</code>. A terminal is mapped to the initial state of the NFA constructed from the associated <code>Expression</code>.</p> |
1247 | */ |
1248 | private final Map<Object, Expression> E; |
1249 | |
1250 | /** |
1251 | * <p>Puts a terminal and associated <code>Expression</code> into this <code>Lexicon</code>. The <code>Expression</code> supersedes any previously associated with the terminal.</p> |
1252 | * |
1253 | * @param a the terminal to add to this <code>Lexicon</code>. |
1254 | * @param e the <code>Expression</code> associated with terminal <code>a</code>. When grabbing, the language expressed by <code>e</code> matches <code>a</code>. |
1255 | */ |
1256 | public void put(Object a, Expression e) { |
1257 | E.put(a, e); |
1258 | I.clear(); |
1259 | F.clear(); |
1260 | } |
1261 | |
1262 | /** |
1263 | * <p>Indicates whether a symbol is a terminal in this <code>Lexicon</code>.</p> |
1264 | * |
1265 | * @param a the symbol whose status is requested. |
1266 | * @return <code>true</code> if <code>a</code> is a terminal in this <code>Lexicon</code>; <code>false</code> otherwise. |
1267 | */ |
1268 | boolean terminal(Object a) { |
1269 | return E.containsKey(a); |
1270 | } |
1271 | //Lexicon() |
1272 | /** |
1273 | * <p>The terminal matched by the character at the end of a source stream.</p> |
1274 | * @since 1.1, renames <code>END_OF_SOURCE</code> in version 1.0. |
1275 | */ |
1276 | protected static final Object $ = new String("$"); |
1277 | |
1278 | /** |
1279 | * <p>The <code>Alphabet</code> containing the character at the end of a source stream.</p> |
1280 | */ |
1281 | private /*static*/ final Expression $_EXPRESSION = new Match((char)-1); |
1282 | |
1283 | /** |
1284 | * <p>Constructs an empty <code>Lexicon</code>.</p> |
1285 | */ |
1286 | protected Lexicon() { |
1287 | E = new HashMap<Object, Expression>(500); |
1288 | I = new Set<Integer>(-200); |
1289 | F = new HashMap<Integer, Object>(500); |
1290 | put($, $_EXPRESSION); |
1291 | } |
1292 | |
1293 | /** |
1294 | * <p>Constructs a <code>Lexicon</code> that is a shallow copy of <code>lexicon</code>. The fields of the new <code>Lexicon</code> refer to the same elements as those in <code>lexicon</code>.</p> |
1295 | * |
1296 | * |
1297 | * @param lexicon the <code>Lexicon</code> to copy. |
1298 | */ |
1299 | Lexicon(Lexicon lexicon) {/*debug*/ |
1300 | debug = lexicon.debug;/*off*/ |
1301 | E = lexicon.E; |
1302 | I = lexicon.I; |
1303 | F = lexicon.F; |
1304 | } |
1305 | //Lexicon.initial |
1306 | /** |
1307 | * <p>Returns the initial states of the lexical NFA.</p> |
1308 | * |
1309 | * @return {@link #I}, computing it and {@link #F} if there is a need to compute the current initial states and final states. |
1310 | */ |
1311 | private Set<Integer> initial() { |
1312 | |
1313 | if (I.isEmpty()) { |
1314 | |
1315 | for (Object a : E.keySet()) { |
1316 | Expression e = E.get(a); |
1317 | |
1318 | I.add(e.i); |
1319 | F.put(e.f, a); |
1320 | } |
1321 | closure(I); |
1322 | } |
1323 | return I; |
1324 | } |
1325 | //accept |
1326 | /** |
1327 | * <p>Computes the current final state, if any, in the lexical NFA.</p> |
1328 | * |
1329 | * @param S the current states. |
1330 | * @return the maximum final state in <code>S</code>. Returns <code>null</code> if <code>S</code> contains no final states. |
1331 | */ |
1332 | private Integer accept(Set<Integer> S) { |
1333 | |
1334 | Integer |
1335 | f = null; |
1336 | |
1337 | for (Integer s : S) |
1338 | if (F.containsKey(s)) |
1339 | if (f == null || f < s) f = s; |
1340 | |
1341 | return f; |
1342 | } |
1343 | |
1344 | /** |
1345 | * <p>This class implements an {@link Lexicon.Exception <code>Exception</code>}.</p> |
1346 | * |
1347 | * @version 1.3 |
1348 | * @author © 1999-2009 <a href="http://www.csupomona.edu/~carich/">Craig A. Rich</a> <<a href="mailto:carich@csupomona.edu">carich@csupomona.edu</a>> |
1349 | */ |
1350 | public class Exception extends java.lang.Exception { |
1351 | |
1352 | /** |
1353 | * <p>The extended error message.</p> |
1354 | */ |
1355 | private StringBuffer message; |
1356 | |
1357 | /** |
1358 | * <p>Constructs an <code>Exception</code> with a message.</p> |
1359 | * |
1360 | * @param message the error message. |
1361 | */ |
1362 | public Exception(String message) { |
1363 | super(message); |
1364 | } |
1365 | |
1366 | /** |
1367 | * <p>Returns the error message.</p> |
1368 | * |
1369 | * @return the error message. |
1370 | */ |
1371 | public String getMessage() { |
1372 | return (message == null) ? super.getMessage() : message.toString(); |
1373 | } |
1374 | |
1375 | /** |
1376 | * <p>Extends the error message in this <code>Exception</code>. The extended message includes the line number, message and source characters following the error.</p> |
1377 | * |
1378 | * @param source the source character stream. |
1379 | * @return this <code>Exception</code> with an extended message. |
1380 | */ |
1381 | Exception extend(LineNumberReader source) { |
1382 | if (message == null) message = new StringBuffer(132); |
1383 | else message.setLength(0); |
1384 | |
1385 | message.append("line "); |
1386 | message.append(source.getLineNumber()+1); |
1387 | message.append(": "); |
1388 | message.append(super.getMessage()); |
1389 | message.append(System.getProperty("line.separator")); |
1390 | message.append("..."); |
1391 | message.append(word()); |
1392 | try { |
1393 | String rest = source.readLine(); |
1394 | if (rest != null) message.append(rest); |
1395 | } |
1396 | catch (IOException exception) {} |
1397 | message.append(System.getProperty("line.separator")); |
1398 | message.append(" ^"); |
1399 | return this; |
1400 | } |
1401 | } |
1402 | //Lexicon.grab |
1403 | /** |
1404 | * <p>The states through which the lexical NFA transitions.</p> |
1405 | */ |
1406 | private final Set<Integer>[] R = (Set<Integer>[])new Set<?>[]{new Set<Integer>(-200), new Set<Integer>(-200)}; |
1407 | /** |
1408 | * <p>The <code>StringBuffer</code> containing the word most recently grabbed.</p> |
1409 | */ |
1410 | private final StringBuffer w = new StringBuffer(4000); |
1411 | |
1412 | /** |
1413 | * <p>Grabs a terminal from a source character stream using this <code>Lexicon</code>. The variable returned by {@link #word()} is set to the longest nonempty prefix of the remaining source characters matching an <code>Expression</code> in this <code>Lexicon</code>. If no nonempty prefix matches an <code>Expression</code>, a <code>Lexicon.Exception</code> is thrown. If the longest matching prefix matches more than one <code>Expression</code>, the terminal associated with the <code>Expression</code> most recently constructed is returned. Blocks until a character is available, an I/O error occurs, or the end of the source stream is reached.</p> |
1414 | * |
1415 | * @param source the source character stream. |
1416 | * @return the terminal grabbed from <code>source</code>. |
1417 | * @throws Lexicon.Exception if an I/O or lexical error occurs. |
1418 | */ |
1419 | protected Object grab(LineNumberReader source) throws Exception { |
1420 | Set<Integer> S = initial(); |
1421 | w.setLength(0); |
1422 | int wLength = 0; |
1423 | Object b = null; |
1424 | try { |
1425 | source.mark(w.capacity()); |
1426 | do { |
1427 | int a = source.read(); |
1428 | S = closure(transition(S, (char)a, R[w.length() % 2])); |
1429 | if (S.isEmpty()) break; |
1430 | |
1431 | if (a != -1) w.append((char)a); else w.append($); |
1432 | |
1433 | Integer f = accept(S); |
1434 | if (f != null) { |
1435 | wLength = w.length(); |
1436 | b = F.get(f); |
1437 | source.mark(w.capacity()); |
1438 | } |
1439 | } while (b != $); |
1440 | w.setLength(wLength); |
1441 | source.reset(); |
1442 | } |
1443 | catch (IOException exception) { |
1444 | throw new Exception(exception.getMessage()); |
1445 | } |
1446 | if (wLength == 0) throw new Exception("lexical error").extend(source); |
1447 | return b; |
1448 | } |
1449 | |
1450 | /** |
1451 | * <p>Returns the word most recently grabbed using this <code>Lexicon</code>.</p> |
1452 | * |
1453 | * @return the word most recently grabbed by {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>}. |
1454 | */ |
1455 | protected String word() { |
1456 | return w.substring(0); |
1457 | } |
1458 | //Lexicon.interpret |
1459 | /** |
1460 | * <p>Repeatedly invokes {@link #grab(java.io.LineNumberReader) <code>grab(source)</code>} until the end of the source stream reached. Blocks until a character is available, or an I/O error occurs. This method is overridden by <code>Grammar</code> and its parser subclasses, so it is only invoked when this <code>Lexicon</code> has not been extended into a <code>Grammar</code> or parser.</p> |
1461 | * |
1462 | * @param source the source character stream. |
1463 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. A <code>Lexicon</code> always returns null. |
1464 | * @throws Lexicon.Exception if an I/O or lexical error occurs. |
1465 | */ |
1466 | Object interpret(LineNumberReader source) throws Exception {/*debug*/ |
1467 | if ((debug & TERMINALS) > 0) System.out.println( |
1468 | "----terminals\n\t" + E.keySet().toString().replaceFirst("\\[", "{").replaceAll(", ", " ").replaceFirst("\\]$", "}\n----------"));/*off*/ |
1469 | |
1470 | for (Object a; (a = grab(source)) != $;)/*debug*/ |
1471 | if ((debug & LEXICAL) > 0) System.out.println( |
1472 | a + (!a.equals(word()) ? " " + word() : ""))/*off*/ |
1473 | ; |
1474 | return null; |
1475 | } |
1476 | |
1477 | /** |
1478 | * <p>Interprets a source character stream using this <code>Lexicon</code>.</p> |
1479 | * |
1480 | * @param source the source character stream. |
1481 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1482 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1483 | */ |
1484 | public Object interpret(Reader source) throws Exception { |
1485 | return interpret(new LineNumberReader(source)); |
1486 | } |
1487 | |
1488 | /** |
1489 | * <p>Interprets a source string using this <code>Lexicon</code>.</p> |
1490 | * |
1491 | * @param source the source string. |
1492 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1493 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1494 | */ |
1495 | public Object interpret(String source) throws Exception { |
1496 | return interpret(new StringReader(source)); |
1497 | } |
1498 | |
1499 | /** |
1500 | * <p>Interprets a source byte stream using this <code>Lexicon</code>.</p> |
1501 | * |
1502 | * @param source the source byte stream. |
1503 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1504 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1505 | */ |
1506 | public Object interpret(InputStream source) throws Exception { |
1507 | return interpret(new InputStreamReader(source)); |
1508 | } |
1509 | |
1510 | /** |
1511 | * <p>Interprets the standard input stream using this <code>Lexicon</code>.</p> |
1512 | * |
1513 | * @return the <code>ParseTree</code> constructed by interpreting the standard input stream. |
1514 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1515 | */ |
1516 | public Object interpret() throws Exception { |
1517 | return interpret(System.in); |
1518 | } |
1519 | |
1520 | /** |
1521 | * <p>Interprets a source file using this <code>Lexicon</code>.</p> |
1522 | * |
1523 | * @param source the source file. |
1524 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1525 | * @throws FileNotFoundException if the source file cannot be found. |
1526 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1527 | */ |
1528 | public Object interpret(File source) throws FileNotFoundException, Exception { |
1529 | return interpret(new FileReader(source)); |
1530 | } |
1531 | |
1532 | /** |
1533 | * <p>Interprets a source pipe using this <code>Lexicon</code>.</p> |
1534 | * |
1535 | * @param source the source pipe. |
1536 | * @return the <code>ParseTree</code> constructed by interpreting <code>source</code>. |
1537 | * @throws IOException if the source pipe cannot be connected. |
1538 | * @throws Lexicon.Exception if an I/O, lexical, syntax or semantic error occurs. |
1539 | */ |
1540 | public Object interpret(PipedWriter source) throws IOException, Exception { |
1541 | return interpret(new PipedReader(source)); |
1542 | } |
1543 | //Lexicon.interpret(arguments) |
1544 | /** |
1545 | * <p>The debug switches, initially zero. The following bits enable debugging to standard output:</p> |
1546 | * <blockquote><dl> |
1547 | * <dt><code>0x01</code> = <code>TERMINALS</code></dt> |
1548 | * <dd>Print the set of terminals before lexical analysis</dd> |
1549 | * <dt><code>0x02</code> = <code>LEXICAL</code> </dt> |
1550 | * <dd>Print terminals and associated words grabbed during lexical analysis</dd> |
1551 | * <dt><code>0x04</code> = <code>FIRST_FOLLOW</code></dt> |
1552 | * <dd>Print first and follow sets precomputed during syntax analysis</dd> |
1553 | * <dt><code>0x08</code> = <code>SYNTAX</code></dt> |
1554 | * <dd>Print parsing decisions made during syntax analysis</dd> |
1555 | * <dt><code>0x10</code> = <code>CONFLICT</code></dt> |
1556 | * <dd>Print parsing conflicts encountered during syntax analysis</dd> |
1557 | * <dt><code>0x20</code> = <code>PARSE_TREE</code></dt> |
1558 | * <dd>Print each <code>ParseTree</code> produced by syntax analysis</dd> |
1559 | * </dl></blockquote> |
1560 | * @since 1.1 |
1561 | */ |
1562 | protected int debug = 0; |
1563 | |
1564 | /** |
1565 | * <p>{@link #debug <code>debug</code>} switch constant enabling printing the set of terminals before lexical analysis.</p> |
1566 | * @since 1.1 |
1567 | */ |
1568 | protected static final int TERMINALS = 0x01; |
1569 | /** |
1570 | * <p>{@link #debug <code>debug</code>} switch constant enabling printing terminals and associated words grabbed during lexical analysis.</p> |
1571 | * @since 1.1 |
1572 | */ |
1573 | protected static final int LEXICAL = 0x02; |
1574 | /** |
1575 | * <p>{@link #debug <code>debug</code>} switch constant enabling all debugging.</p> |
1576 | * @since 1.1 |
1577 | */ |
1578 | protected static final int VERBOSE = 0xFF; |
1579 | |
1580 | } |
1581 | // Lexicon |
1582 | |
1583 | public class main { |
1584 | public static void main(String[] args) throws Exception { |
1585 | String s = loadMainJava(); |
1586 | |
1587 | List<String> tok = JavaTok.split(s); |
1588 | for (int i = 1; i+6 < tok.size(); i += 2) |
1589 | if (Character.isLetter(tok.get(i).charAt(0)) |
1590 | && tok.get(i+2).equals("=") && tok.get(i+4).equals("=") |
1591 | && tok.get(i+6).startsWith("\"")) { |
1592 | String var = tok.get(i), str = tok.get(i+6); |
1593 | |
1594 | for (int j = i; j <= i+6; j++) |
1595 | tok.set(j, ""); |
1596 | |
1597 | tok.set(i, str + ".equals(" + var + ")"); |
1598 | i += 6; |
1599 | } |
1600 | |
1601 | saveMainJava(JavaTok.join(tok)); |
1602 | } |
1603 | |
1604 | static String loadMainJava() throws IOException { |
1605 | return loadTextFile("input/main.java", ""); |
1606 | } |
1607 | |
1608 | static void saveMainJava(String s) throws IOException { |
1609 | saveTextFile("output/main.java", s); |
1610 | } |
1611 | |
1612 | public static String quote(String s) { |
1613 | if (s == null) return "null"; |
1614 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; |
1615 | } |
1616 | |
1617 | static String cncToLines(List<String> cnc) { |
1618 | StringBuilder out = new StringBuilder(); |
1619 | for (String token : cnc) |
1620 | out.append(quote(token) + "\n"); |
1621 | return out.toString(); |
1622 | } |
1623 | |
1624 | |
1625 | static void set(Class c, String field, Object value) { |
1626 | try { |
1627 | Field f = set_findStaticField(c, field); |
1628 | f.setAccessible(true); |
1629 | f.set(null, value); |
1630 | } catch (Exception e) { |
1631 | throw new RuntimeException(e); |
1632 | } |
1633 | } |
1634 | |
1635 | static Field set_findStaticField(Class<?> c, String field) { |
1636 | for (Field f : c.getDeclaredFields()) |
1637 | if (f.getName().equals(field) && (f.getModifiers() & Modifier.STATIC) != 0) |
1638 | return f; |
1639 | throw new RuntimeException("Static field '" + field + "' not found in " + c.getName()); |
1640 | } |
1641 | |
1642 | public static String join(String glue, Iterable<String> strings) { |
1643 | StringBuilder buf = new StringBuilder(); |
1644 | Iterator<String> i = strings.iterator(); |
1645 | if (i.hasNext()) { |
1646 | buf.append(i.next()); |
1647 | while (i.hasNext()) |
1648 | buf.append(glue).append(i.next()); |
1649 | } |
1650 | return buf.toString(); |
1651 | } |
1652 | |
1653 | public static String join(String glue, String[] strings) { |
1654 | return join(glue, Arrays.asList(strings)); |
1655 | } |
1656 | |
1657 | |
1658 | static Object get(Object o, String field) { |
1659 | try { |
1660 | Field f = get_findField(o.getClass(), field); |
1661 | f.setAccessible(true); |
1662 | return f.get(o); |
1663 | } catch (Exception e) { |
1664 | throw new RuntimeException(e); |
1665 | } |
1666 | } |
1667 | |
1668 | static Field get_findField(Class<?> c, String field) { |
1669 | for (Field f : c.getDeclaredFields()) |
1670 | if (f.getName().equals(field)) |
1671 | return f; |
1672 | throw new RuntimeException("Field '" + field + "' not found in " + c.getName()); |
1673 | } |
1674 | |
1675 | static Object first(Object list) { |
1676 | return ((List) list).isEmpty() ? null : ((List) list).get(0); |
1677 | } |
1678 | |
1679 | |
1680 | /** writes safely (to temp file, then rename) */ |
1681 | public static void saveTextFile(String fileName, String contents) throws IOException { |
1682 | File file = new File(fileName); |
1683 | File parentFile = file.getParentFile(); |
1684 | if (parentFile != null) |
1685 | parentFile.mkdirs(); |
1686 | String tempFileName = fileName + "_temp"; |
1687 | FileOutputStream fileOutputStream = new FileOutputStream(tempFileName); |
1688 | OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, "UTF-8"); |
1689 | PrintWriter printWriter = new PrintWriter(outputStreamWriter); |
1690 | printWriter.print(contents); |
1691 | printWriter.close(); |
1692 | if (file.exists() && !file.delete()) |
1693 | throw new IOException("Can't delete " + fileName); |
1694 | |
1695 | if (!new File(tempFileName).renameTo(file)) |
1696 | throw new IOException("Can't rename " + tempFileName + " to " + fileName); |
1697 | } |
1698 | |
1699 | public static String loadTextFile(String fileName, String defaultContents) throws IOException { |
1700 | if (!new File(fileName).exists()) |
1701 | return defaultContents; |
1702 | |
1703 | FileInputStream fileInputStream = new FileInputStream(fileName); |
1704 | InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, "UTF-8"); |
1705 | return loadTextFile(inputStreamReader); |
1706 | } |
1707 | |
1708 | public static String loadTextFile(Reader reader) throws IOException { |
1709 | StringBuilder builder = new StringBuilder(); |
1710 | try { |
1711 | BufferedReader bufferedReader = new BufferedReader(reader); |
1712 | String line; |
1713 | while ((line = bufferedReader.readLine()) != null) |
1714 | builder.append(line).append('\n'); |
1715 | } finally { |
1716 | reader.close(); |
1717 | } |
1718 | return builder.length() == 0 ? "" : builder.substring(0, builder.length()-1); |
1719 | } |
1720 | |
1721 | static Class<?> getClass(String name) { |
1722 | try { |
1723 | return Class.forName(name); |
1724 | } catch (ClassNotFoundException e) { |
1725 | return null; |
1726 | } |
1727 | } |
1728 | } |
Began life as a copy of #1000636
download show line numbers debug dex old transpilations
Travelled to 16 computer(s): aoiabmzegqzx, bhatertpkbcr, cahelewubzku, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1000706 |
Snippet name: | Compare strings with "==" instead of "equals" (JavaX translator, transpiled) |
Eternal ID of this version: | #1000706/1 |
Text MD5: | d2542fb1db7a1010fd796059d724561a |
Transpilation MD5: | d2542fb1db7a1010fd796059d724561a |
Author: | stefan |
Category: | |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-08-23 16:50:31 |
Source code size: | 54965 bytes / 1728 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 834 / 1235 |
Referenced in: | [show references] |