Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

292
LINES

< > BotCompany Repo | #648 // Lexicon test 2 (tokenizing Java)

JavaX source code - run with: x30.jar

1  
!636
2  
!629 // standard functions
3  
!1000300 // class Lexicon
4  
5  
main {
6  
  psvm {
7  
    String src = takeInput(args, "class main {\n  String s;\n}");
8  
    Java20 lex = new Java20();
9  
    src = src.replace("\r\n", "\n");
10  
    LineNumberReader source = new LineNumberReader(new StringReader(src));
11  
    int lineNr = source.getLineNumber()+1;
12  
    for (Object a; (a = lex.grab(source)) != lex.$;) {
13  
      System.out.println("grabbed at line " + lineNr + ": " + a + " " + quote(lex.word()));
14  
      lineNr = source.getLineNumber()+1;
15  
    }
16  
  }
17  
  
18  
  static String takeInput(String[] args, String def) tex {
19  
    if (args.length != 0) return loadSnippet(args[0]);
20  
    return loadTextFile("input/input.txt", def);
21  
  }
22  
  
23  
  public static String quote(String s) {
24  
    if (s == null) return "null";
25  
    return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\"";
26  
  }
27  
  
28  
  static class Java20 extends Lexicon {
29  
30  
	Java20() {
31  
32  
		/**
33  
		* Grammar for Java 2.0.
34  
		*
35  
		* Nonterminal - first letter uppercase
36  
		* TERMINAL - all letters uppercase
37  
		* keyword - all letters lowercase
38  
		*/
39  
		int INFINITY = -1;
40  
41  
		/**
42  
		* 19.3 Terminals from section 3.6: White Space: [[:space:]]
43  
		*/
44  
		put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));
45  
46  
		/**
47  
		* 19.3 Terminals from section 3.7: Comment
48  
		*/
49  
		put("COMMENT", new Union(
50  
51  
			//
52  
			// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
53  
			//
54  
			new Concatenation(
55  
				new Singleton("/*"), new Concatenation(
56  
				new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
57  
				new Repetition(
58  
					new Concatenation(
59  
						new Singleton("*"),
60  
						new Repetition(new Concatenation(
61  
							new NonMatch("*/"),
62  
							new Repetition(new NonMatch("*"), 0, INFINITY)
63  
						), 0, 1)
64  
					), 0, INFINITY
65  
				),
66  
				new Singleton("*/")
67  
			))), new Union(
68  
69  
			/**
70  
			* End Of Line Comment: //[^\n]*\n
71  
			*/
72  
			new Concatenation(
73  
				new Singleton("//"), new Concatenation(
74  
				new Repetition(new NonMatch("\n"), 0, INFINITY),
75  
				new Singleton("\n")
76  
			)),
77  
78  
			//
79  
			// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
80  
			//
81  
			new Concatenation(
82  
				new Singleton("/**"), new Concatenation(
83  
				new Repetition(
84  
					new Concatenation(
85  
						new Repetition(new Concatenation(
86  
							new NonMatch("*/"),
87  
							new Repetition(new NonMatch("*"), 0, INFINITY)
88  
						), 0, 1),
89  
						new Singleton("*")
90  
					), 0, INFINITY
91  
				),
92  
				new Singleton("/")
93  
			))
94  
		)));
95  
96  
		put("IDENTIFIER", new Concatenation(
97  
			new Union(
98  
				PosixClass.alpha(),
99  
				new Match("_$")
100  
			),
101  
			new Repetition(
102  
				new Union(
103  
					PosixClass.alnum(),
104  
					new Match("_$")
105  
				), 0, INFINITY
106  
			)
107  
		));
108  
109  
		/**
110  
		* 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar)
111  
		*/
112  
		put("KEYWORD", new Union(
113  
			new Singleton("const"),
114  
			new Singleton("goto")
115  
		));
116  
117  
		/**
118  
		* 19.3 Terminals from section 3.10.1: Integer Literal
119  
		*/
120  
		put("INTEGER_LITERAL", new Concatenation(
121  
			new Union(
122  
				/**
123  
				* Decimal Integer Literal: 0|[1-9][[:digit:]]*
124  
				*/
125  
				new Singleton("0"), new Union(
126  
127  
				new Concatenation(
128  
					new Range('1', '9'),
129  
					new Repetition(PosixClass.digit(), 0, INFINITY)
130  
				), new Union(
131  
132  
				/**
133  
				* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
134  
				*/
135  
				new Concatenation(
136  
					new Singleton("0"), new Concatenation(
137  
					new Match("xX"),
138  
					new Repetition(PosixClass.xdigit(), 1, INFINITY)
139  
				)),
140  
141  
				/**
142  
				* Octal Integer Literal: 0[0-7]+
143  
				*/
144  
				new Concatenation(
145  
					new Singleton("0"),
146  
					new Repetition(new Range('0', '7'), 1, INFINITY)
147  
				)
148  
			))),
149  
			new Repetition(new Match("lL"), 0, 1)
150  
		));
151  
152  
		/**
153  
		* 19.3 Terminals from section 3.10.2: Floating-Point Literal
154  
		*/
155  
		put("FLOATING_POINT_LITERAL", new Union(
156  
157  
			/**
158  
			* [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
159  
			*/
160  
			new Concatenation(
161  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
162  
				new Singleton("."), new Concatenation(
163  
				new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation(
164  
				new Repetition(new Concatenation(
165  
					new Match("eE"), new Concatenation(
166  
					new Repetition(new Match("-+"), 0, 1),
167  
					new Repetition(PosixClass.digit(), 1, INFINITY)
168  
				)), 0, 1),
169  
				new Repetition(new Match("fFdD"), 0, 1)
170  
			)))), new Union(
171  
172  
			/**
173  
			* \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
174  
			*/
175  
			new Concatenation(
176  
				new Singleton("."), new Concatenation(
177  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
178  
				new Repetition(new Concatenation(
179  
					new Match("eE"), new Concatenation(
180  
					new Repetition(new Match("-+"), 0, 1),
181  
					new Repetition(PosixClass.digit(), 1, INFINITY)
182  
				)), 0, 1),
183  
				new Repetition(new Match("fFdD"), 0, 1)
184  
			))), new Union(
185  
186  
			/**
187  
			* [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
188  
			*/
189  
			new Concatenation(
190  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
191  
				new Match("eE"), new Concatenation(
192  
				new Repetition(new Match("-+"), 0, 1), new Concatenation(
193  
				new Repetition(PosixClass.digit(), 1, INFINITY),
194  
				new Repetition(new Match("fFdD"), 0, 1)
195  
			)))),
196  
197  
			/**
198  
			* [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
199  
			*/
200  
			new Concatenation(
201  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
202  
				new Repetition(new Concatenation(
203  
					new Match("eE"), new Concatenation(
204  
					new Repetition(new Match("-+"), 0, 1),
205  
					new Repetition(PosixClass.digit(), 1, INFINITY)
206  
				)), 0, 1),
207  
				new Match("fFdD")
208  
			))
209  
		))));
210  
211  
		/**
212  
		* 19.3 Terminals from section 3.10.3: Boolean Literal
213  
		*/
214  
		put("BOOLEAN_LITERAL", new Union(
215  
			new Singleton("true"),
216  
			new Singleton("false")
217  
		));
218  
219  
		/**
220  
		* 19.3 Terminals from section 3.10.4: Character Literal
221  
		*/
222  
		put("CHARACTER_LITERAL", new Concatenation(
223  
			new Singleton("'"), new Concatenation(
224  
			new Union(
225  
226  
				/**
227  
				* Single Character: [^\r\n'\\]
228  
				*/
229  
				new NonMatch("\r\n'\\"),
230  
231  
				/**
232  
				* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
233  
				*/
234  
				new Concatenation(
235  
					new Singleton("\\"),
236  
					new Union(
237  
						new Match("btnfr\"'\\"),
238  
						new Concatenation(
239  
							new Repetition(new Range('0', '3'), 0, 1),
240  
							new Repetition(new Range('0', '7'), 1, 2)
241  
						)
242  
					)
243  
				)
244  
			),
245  
			new Singleton("'")
246  
		)));
247  
248  
		/**
249  
		* 19.3 Terminals from section 3.10.5: String Literal
250  
		*/
251  
		put("STRING_LITERAL", new Concatenation(
252  
			new Singleton("\""), new Concatenation(
253  
			new Repetition(
254  
				new Union(
255  
256  
					/**
257  
					* Single Character: [^\r\n"\\]
258  
					*/
259  
					new NonMatch("\r\n\"\\"),
260  
261  
					/**
262  
					* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
263  
					*/
264  
					new Concatenation(
265  
						new Singleton("\\"),
266  
						new Union(
267  
							new Match("btnfr\"'\\"),
268  
							new Concatenation(
269  
								new Repetition(new Range('0', '3'), 0, 1),
270  
								new Repetition(new Range('0', '7'), 1, 2)
271  
							)
272  
						)
273  
					)
274  
				), 0, INFINITY
275  
			),
276  
			new Singleton("\"")
277  
		)));
278  
279  
		/**
280  
		* 19.3 Terminals section 3.10.7: Null Literal
281  
		*/
282  
		put("NULL_LITERAL", new Singleton("null"));
283  
		
284  
		// OK, it seems we have to add some more stuff...
285  
		
286  
		//put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
287  
		//put("OTHER1", new NonMatch("")); // catch anything, one character at a time
288  
		put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time
289  
290  
	}
291  
} // class Java20
292  
}

Author comment

Began life as a copy of #646

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #648
Snippet name: Lexicon test 2 (tokenizing Java)
Eternal ID of this version: #648/1
Text MD5: fe078705832195394c64b47b92834d91
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-27 17:07:16
Source code size: 7606 bytes / 292 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 640 / 545
Referenced in: [show references]