Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

265
LINES

< > BotCompany Repo | #2000394 // class Java20 (Java tokenizer)

New Tinybrain snippet

1  
  
2  
  class Java20 extends Lexicon {
3  
4  
	Java20() {
5  
6  
		/**
7  
		* Grammar for Java 2.0.
8  
		*
9  
		* Nonterminal - first letter uppercase
10  
		* TERMINAL - all letters uppercase
11  
		* keyword - all letters lowercase
12  
		*/
13  
		int INFINITY = -1;
14  
15  
		/**
16  
		* 19.3 Terminals from section 3.6: White Space: [[:space:]]
17  
		*/
18  
		put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));
19  
20  
		/**
21  
		* 19.3 Terminals from section 3.7: Comment
22  
		*/
23  
		put("COMMENT", new Union(
24  
25  
			//
26  
			// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
27  
			//
28  
			new Concatenation(
29  
				new Singleton("/*"), new Concatenation(
30  
				new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
31  
				new Repetition(
32  
					new Concatenation(
33  
						new Singleton("*"),
34  
						new Repetition(new Concatenation(
35  
							new NonMatch("*/"),
36  
							new Repetition(new NonMatch("*"), 0, INFINITY)
37  
						), 0, 1)
38  
					), 0, INFINITY
39  
				),
40  
				new Singleton("*/")
41  
			))), new Union(
42  
43  
			/**
44  
			* End Of Line Comment: //[^\n]*\n
45  
			*/
46  
			new Concatenation(
47  
				new Singleton("//"), new Concatenation(
48  
				new Repetition(new NonMatch("\n"), 0, INFINITY),
49  
				new Singleton("\n")
50  
			)),
51  
52  
			//
53  
			// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
54  
			//
55  
			new Concatenation(
56  
				new Singleton("/**"), new Concatenation(
57  
				new Repetition(
58  
					new Concatenation(
59  
						new Repetition(new Concatenation(
60  
							new NonMatch("*/"),
61  
							new Repetition(new NonMatch("*"), 0, INFINITY)
62  
						), 0, 1),
63  
						new Singleton("*")
64  
					), 0, INFINITY
65  
				),
66  
				new Singleton("/")
67  
			))
68  
		)));
69  
70  
		put("IDENTIFIER", new Concatenation(
71  
			new Union(
72  
				PosixClass.alpha(),
73  
				new Match("_$")
74  
			),
75  
			new Repetition(
76  
				new Union(
77  
					PosixClass.alnum(),
78  
					new Match("_$")
79  
				), 0, INFINITY
80  
			)
81  
		));
82  
83  
		/**
84  
		* 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar)
85  
		*/
86  
		put("KEYWORD", new Union(
87  
			new Singleton("const"),
88  
			new Singleton("goto")
89  
		));
90  
91  
		/**
92  
		* 19.3 Terminals from section 3.10.1: Integer Literal
93  
		*/
94  
		put("INTEGER_LITERAL", new Concatenation(
95  
			new Union(
96  
				/**
97  
				* Decimal Integer Literal: 0|[1-9][[:digit:]]*
98  
				*/
99  
				new Singleton("0"), new Union(
100  
101  
				new Concatenation(
102  
					new Range('1', '9'),
103  
					new Repetition(PosixClass.digit(), 0, INFINITY)
104  
				), new Union(
105  
106  
				/**
107  
				* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
108  
				*/
109  
				new Concatenation(
110  
					new Singleton("0"), new Concatenation(
111  
					new Match("xX"),
112  
					new Repetition(PosixClass.xdigit(), 1, INFINITY)
113  
				)),
114  
115  
				/**
116  
				* Octal Integer Literal: 0[0-7]+
117  
				*/
118  
				new Concatenation(
119  
					new Singleton("0"),
120  
					new Repetition(new Range('0', '7'), 1, INFINITY)
121  
				)
122  
			))),
123  
			new Repetition(new Match("lL"), 0, 1)
124  
		));
125  
126  
		/**
127  
		* 19.3 Terminals from section 3.10.2: Floating-Point Literal
128  
		*/
129  
		put("FLOATING_POINT_LITERAL", new Union(
130  
131  
			/**
132  
			* [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
133  
			*/
134  
			new Concatenation(
135  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
136  
				new Singleton("."), new Concatenation(
137  
				new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation(
138  
				new Repetition(new Concatenation(
139  
					new Match("eE"), new Concatenation(
140  
					new Repetition(new Match("-+"), 0, 1),
141  
					new Repetition(PosixClass.digit(), 1, INFINITY)
142  
				)), 0, 1),
143  
				new Repetition(new Match("fFdD"), 0, 1)
144  
			)))), new Union(
145  
146  
			/**
147  
			* \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
148  
			*/
149  
			new Concatenation(
150  
				new Singleton("."), new Concatenation(
151  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
152  
				new Repetition(new Concatenation(
153  
					new Match("eE"), new Concatenation(
154  
					new Repetition(new Match("-+"), 0, 1),
155  
					new Repetition(PosixClass.digit(), 1, INFINITY)
156  
				)), 0, 1),
157  
				new Repetition(new Match("fFdD"), 0, 1)
158  
			))), new Union(
159  
160  
			/**
161  
			* [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
162  
			*/
163  
			new Concatenation(
164  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
165  
				new Match("eE"), new Concatenation(
166  
				new Repetition(new Match("-+"), 0, 1), new Concatenation(
167  
				new Repetition(PosixClass.digit(), 1, INFINITY),
168  
				new Repetition(new Match("fFdD"), 0, 1)
169  
			)))),
170  
171  
			/**
172  
			* [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
173  
			*/
174  
			new Concatenation(
175  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
176  
				new Repetition(new Concatenation(
177  
					new Match("eE"), new Concatenation(
178  
					new Repetition(new Match("-+"), 0, 1),
179  
					new Repetition(PosixClass.digit(), 1, INFINITY)
180  
				)), 0, 1),
181  
				new Match("fFdD")
182  
			))
183  
		))));
184  
185  
		/**
186  
		* 19.3 Terminals from section 3.10.3: Boolean Literal
187  
		*/
188  
		put("BOOLEAN_LITERAL", new Union(
189  
			new Singleton("true"),
190  
			new Singleton("false")
191  
		));
192  
193  
		/**
194  
		* 19.3 Terminals from section 3.10.4: Character Literal
195  
		*/
196  
		put("CHARACTER_LITERAL", new Concatenation(
197  
			new Singleton("'"), new Concatenation(
198  
			new Union(
199  
200  
				/**
201  
				* Single Character: [^\r\n'\\]
202  
				*/
203  
				new NonMatch("\r\n'\\"),
204  
205  
				/**
206  
				* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
207  
				*/
208  
				new Concatenation(
209  
					new Singleton("\\"),
210  
					new Union(
211  
						new Match("btnfr\"'\\"),
212  
						new Concatenation(
213  
							new Repetition(new Range('0', '3'), 0, 1),
214  
							new Repetition(new Range('0', '7'), 1, 2)
215  
						)
216  
					)
217  
				)
218  
			),
219  
			new Singleton("'")
220  
		)));
221  
222  
		/**
223  
		* 19.3 Terminals from section 3.10.5: String Literal
224  
		*/
225  
		put("STRING_LITERAL", new Concatenation(
226  
			new Singleton("\""), new Concatenation(
227  
			new Repetition(
228  
				new Union(
229  
230  
					/**
231  
					* Single Character: [^\r\n"\\]
232  
					*/
233  
					new NonMatch("\r\n\"\\"),
234  
235  
					/**
236  
					* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
237  
					*/
238  
					new Concatenation(
239  
						new Singleton("\\"),
240  
						new Union(
241  
							new Match("btnfr\"'\\"),
242  
							new Concatenation(
243  
								new Repetition(new Range('0', '3'), 0, 1),
244  
								new Repetition(new Range('0', '7'), 1, 2)
245  
							)
246  
						)
247  
					)
248  
				), 0, INFINITY
249  
			),
250  
			new Singleton("\"")
251  
		)));
252  
253  
		/**
254  
		* 19.3 Terminals section 3.10.7: Null Literal
255  
		*/
256  
		put("NULL_LITERAL", new Singleton("null"));
257  
		
258  
		// OK, it seems we have to add some more stuff...
259  
		
260  
		//put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
261  
		//put("OTHER1", new NonMatch("")); // catch anything, one character at a time
262  
		put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time
263  
264  
	}
265  
} // class Java20

Author comment

Began life as a copy of #648

download  show line numbers   

Snippet is not live.

Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

Comments [hide]

ID Author/Program Comment Date
301 #1000604 (pitcher) 2015-08-18 00:54:56
257 #1000610 (pitcher) Edit suggestion:
!636
!629

main {
static Object androidContext;
static String programID;

public static void main(String[] args) throws Exception {

class Java20 extends Lexicon {

Java20() {

/**
* Grammar for Java 2.0.
*
* Nonterminal - first letter uppercase
* TERMINAL - all letters uppercase
* keyword - all letters lowercase
*/
int INFINITY = -1;

/**
* 19.3 Terminals from section 3.6: White Space: [[:space:]]
*/
put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));

/**
* 19.3 Terminals from section 3.7: Comment
*/
put("COMMENT", new Union(

//
// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
//
new Concatenation(
new Singleton("/*"), new Concatenation(
new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
new Repetition(
new Concatenation(
new Singleton("*"),
new Repetition(new Concatenation(
new NonMatch("*/"),
new Repetition(new NonMatch("*"), 0, INFINITY)
), 0, 1)
), 0, INFINITY
),
new Singleton("*/")
))), new Union(

/**
* End Of Line Comment: //[^\n]*\n
*/
new Concatenation(
new Singleton("//"), new Concatenation(
new Repetition(new NonMatch("\n"), 0, INFINITY),
new Singleton("\n")
)),

//
// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
//
new Concatenation(
new Singleton("/**"), new Concatenation(
new Repetition(
new Concatenation(
new Repetition(new Concatenation(
new NonMatch("*/"),
new Repetition(new NonMatch("*"), 0, INFINITY)
), 0, 1),
new Singleton("*")
), 0, INFINITY
),
new Singleton("/")
))
)));

put("IDENTIFIER", new Concatenation(
new Union(
PosixClass.alpha(),
new Match("_$")
),
new Repetition(
new Union(
PosixClass.alnum(),
new Match("_$")
), 0, INFINITY
)
));

/**
* 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar)
*/
put("KEYWORD", new Union(
new Singleton("const"),
new Singleton("goto")
));

/**
* 19.3 Terminals from section 3.10.1: Integer Literal
*/
put("INTEGER_LITERAL", new Concatenation(
new Union(
/**
* Decimal Integer Literal: 0|[1-9][[:digit:]]*
*/
new Singleton("0"), new Union(

new Concatenation(
new Range('1', '9'),
new Repetition(PosixClass.digit(), 0, INFINITY)
), new Union(

/**
* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
*/
new Concatenation(
new Singleton("0"), new Concatenation(
new Match("xX"),
new Repetition(PosixClass.xdigit(), 1, INFINITY)
)),

/**
* Octal Integer Literal: 0[0-7]+
*/
new Concatenation(
new Singleton("0"),
new Repetition(new Range('0', '7'), 1, INFINITY)
)
))),
new Repetition(new Match("lL"), 0, 1)
));

/**
* 19.3 Terminals from section 3.10.2: Floating-Point Literal
*/
put("FLOATING_POINT_LITERAL", new Union(

/**
* [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
*/
new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
new Singleton("."), new Concatenation(
new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation(
new Repetition(new Concatenation(
new Match("eE"), new Concatenation(
new Repetition(new Match("-+"), 0, 1),
new Repetition(PosixClass.digit(), 1, INFINITY)
)), 0, 1),
new Repetition(new Match("fFdD"), 0, 1)
)))), new Union(

/**
* \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
*/
new Concatenation(
new Singleton("."), new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
new Repetition(new Concatenation(
new Match("eE"), new Concatenation(
new Repetition(new Match("-+"), 0, 1),
new Repetition(PosixClass.digit(), 1, INFINITY)
)), 0, 1),
new Repetition(new Match("fFdD"), 0, 1)
))), new Union(

/**
* [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
*/
new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
new Match("eE"), new Concatenation(
new Repetition(new Match("-+"), 0, 1), new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY),
new Repetition(new Match("fFdD"), 0, 1)
)))),

/**
* [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
*/
new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
new Repetition(new Concatenation(
new Match("eE"), new Concatenation(
new Repetition(new Match("-+"), 0, 1),
new Repetition(PosixClass.digit(), 1, INFINITY)
)), 0, 1),
new Match("fFdD")
))
))));

/**
* 19.3 Terminals from section 3.10.3: Boolean Literal
*/
put("BOOLEAN_LITERAL", new Union(
new Singleton("true"),
new Singleton("false")
));

/**
* 19.3 Terminals from section 3.10.4: Character Literal
*/
put("CHARACTER_LITERAL", new Concatenation(
new Singleton("'"), new Concatenation(
new Union(

/**
* Single Character: [^\r\n'\\]
*/
new NonMatch("\r\n'\\"),

/**
* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
*/
new Concatenation(
new Singleton("\\"),
new Union(
new Match("btnfr\"'\\"),
new Concatenation(
new Repetition(new Range('0', '3'), 0, 1),
new Repetition(new Range('0', '7'), 1, 2)
)
)
)
),
new Singleton("'")
)));

/**
* 19.3 Terminals from section 3.10.5: String Literal
*/
put("STRING_LITERAL", new Concatenation(
new Singleton("\""), new Concatenation(
new Repetition(
new Union(

/**
* Single Character: [^\r\n"\\]
*/
new NonMatch("\r\n\"\\"),

/**
* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
*/
new Concatenation(
new Singleton("\\"),
new Union(
new Match("btnfr\"'\\"),
new Concatenation(
new Repetition(new Range('0', '3'), 0, 1),
new Repetition(new Range('0', '7'), 1, 2)
)
)
)
), 0, INFINITY
),
new Singleton("\"")
)));

/**
* 19.3 Terminals section 3.10.7: Null Literal
*/
put("NULL_LITERAL", new Singleton("null"));

// OK, it seems we have to add some more stuff...

//put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
//put("OTHER1", new NonMatch("")); // catch anything, one character at a time
put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time

}
} // class Java20

}}
2015-08-18 00:53:13

add comment

Snippet ID: #2000394
Snippet name: class Java20 (Java tokenizer)
Eternal ID of this version: #2000394/1
Text MD5: 0f87ae118d54e7797fcee74aef40c57b
Author: stefan
Category: javax
Type: New Tinybrain snippet
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-27 08:59:23
Source code size: 6681 bytes / 265 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 847 / 490
Referenced in: [show references]