1 | !636 |
2 | !1000300 // class Lexicon |
3 | |
4 | main { |
5 | psvm { |
6 | String src = "class main { String s; }"; |
7 | MyLexicon lex = new MyLexicon(); |
8 | lex.lexicalRules(true); |
9 | LineNumberReader source = new LineNumberReader(new StringReader(src)); |
10 | for (Object a; (a = lex.grab(source)) != lex.$;) { |
11 | System.out.println("grabbed: " + a); |
12 | } |
13 | } |
14 | |
15 | static class MyLexicon extends Lexicon { |
16 | private static final String idChars1 = "_$?-#+"; |
17 | private static final String idChars2 = "_$-.+"; |
18 | |
19 | void lexicalRules(boolean allowShortFloatingPoints) { |
20 | // These rules are taken from Java10.java (part of gi-0.9 distribution) |
21 | |
22 | int INFINITY = -1; |
23 | |
24 | /** |
25 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
26 | */ |
27 | put("WHITE_SPACE", PosixClass.space()); |
28 | |
29 | /** |
30 | * 19.3 Terminals from section 3.7: Comment |
31 | */ |
32 | put("COMMENT", new Union( |
33 | |
34 | // |
35 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
36 | // |
37 | new Concatenation( |
38 | new Singleton("/*"), new Concatenation( |
39 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( |
40 | new Repetition( |
41 | new Concatenation( |
42 | new Singleton("*"), |
43 | new Repetition(new Concatenation( |
44 | new NonMatch("*/"), |
45 | new Repetition(new NonMatch("*"), 0, INFINITY) |
46 | ), 0, 1) |
47 | ), 0, INFINITY |
48 | ), |
49 | new Singleton("*/") |
50 | ))), new Union( |
51 | |
52 | /** |
53 | * End Of Line Comment: //[^\n]*\n |
54 | */ |
55 | new Concatenation( |
56 | new Singleton("//"), new Concatenation( |
57 | new Repetition(new NonMatch("\n"), 0, INFINITY), |
58 | new Singleton("\n") |
59 | )), |
60 | |
61 | // |
62 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
63 | // |
64 | new Concatenation( |
65 | new Singleton("/**"), new Concatenation( |
66 | new Repetition( |
67 | new Concatenation( |
68 | new Repetition(new Concatenation( |
69 | new NonMatch("*/"), |
70 | new Repetition(new NonMatch("*"), 0, INFINITY) |
71 | ), 0, 1), |
72 | new Singleton("*") |
73 | ), 0, INFINITY |
74 | ), |
75 | new Singleton("/") |
76 | )) |
77 | ))); |
78 | |
79 | put("IDENTIFIER", new Concatenation( |
80 | new Union( |
81 | PosixClass.alpha(), |
82 | new Match(idChars1) |
83 | ), |
84 | new Repetition( |
85 | new Union( |
86 | PosixClass.alnum(), |
87 | new Match(idChars2) |
88 | ), 0, INFINITY |
89 | ) |
90 | )); |
91 | |
92 | /** |
93 | * 19.3 Terminals from section 3.10.1: Integer Literal |
94 | */ |
95 | put("INTEGER_LITERAL", new Concatenation( |
96 | new Union( |
97 | /** |
98 | * Decimal Integer Literal: 0|[1-9][[:digit:]]* |
99 | */ |
100 | new Singleton("0"), new Union( |
101 | |
102 | new Concatenation( |
103 | new Range('1', '9'), |
104 | new Repetition(PosixClass.digit(), 0, INFINITY) |
105 | ), new Union( |
106 | |
107 | /** |
108 | * Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+ |
109 | */ |
110 | new Concatenation( |
111 | new Singleton("0"), new Concatenation( |
112 | new Match("xX"), |
113 | new Repetition(PosixClass.xdigit(), 1, INFINITY) |
114 | )), |
115 | |
116 | /** |
117 | * Octal Integer Literal: 0[0-7]+ |
118 | */ |
119 | new Concatenation( |
120 | new Singleton("0"), |
121 | new Repetition(new Range('0', '7'), 1, INFINITY) |
122 | ) |
123 | ))), |
124 | new Repetition(new Match("lL"), 0, 1) |
125 | )); |
126 | |
127 | /** |
128 | * 19.3 Terminals from Java section 3.10.5: String Literal - extended for multi-line strings |
129 | */ |
130 | put("STRING_LITERAL", new Concatenation( |
131 | new Singleton("\""), new Concatenation( |
132 | new Repetition( |
133 | new Union( |
134 | |
135 | /** |
136 | * Single Character: [^\r\n"\\] |
137 | */ |
138 | new NonMatch("\"\\"), // Java: new NonMatch("\r\n\"\\"), |
139 | |
140 | /** |
141 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
142 | */ |
143 | new Concatenation( |
144 | new Singleton("\\"), |
145 | new Union( |
146 | new Match("btnfr\"'\\"), |
147 | new Concatenation( |
148 | new Repetition(new Range('0', '3'), 0, 1), |
149 | new Repetition(new Range('0', '7'), 1, 2) |
150 | ) |
151 | ) |
152 | ) |
153 | ), 0, INFINITY |
154 | ), |
155 | new Singleton("\"") |
156 | ))); |
157 | |
158 | /** |
159 | * Addition for trees: Single-quote string literal |
160 | * (is stored quoted with double-quotes. Yeah, it's |
161 | * a little confusing, but don't worry.) |
162 | */ |
163 | put("SQ_STRING_LITERAL", new Concatenation( |
164 | new Singleton("\'"), new Concatenation( |
165 | new Repetition( |
166 | new Union( |
167 | |
168 | /** |
169 | * Single Character: [^\r\n"\\] |
170 | */ |
171 | new NonMatch("\r\n\'\\"), |
172 | |
173 | /** |
174 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
175 | */ |
176 | new Concatenation( |
177 | new Singleton("\\"), |
178 | new Union( |
179 | new Match("btnfr\"'\\"), |
180 | new Concatenation( |
181 | new Repetition(new Range('0', '3'), 0, 1), |
182 | new Repetition(new Range('0', '7'), 1, 2) |
183 | ) |
184 | ) |
185 | ) |
186 | ), 0, INFINITY |
187 | ), |
188 | new Singleton("\'") |
189 | ))); |
190 | |
191 | /** |
192 | * Multi-line string literals within brackets (>> hello world <<) |
193 | */ |
194 | put("ML_STRING_LITERAL", new Concatenation( |
195 | new Singleton(">>"), new Concatenation( |
196 | new Repetition( |
197 | new Union( |
198 | // either not a closing bracket.. |
199 | new NonMatch("<"), |
200 | |
201 | // or one closing bracket but not two of them |
202 | new Concatenation( |
203 | new Singleton("<"), |
204 | new NonMatch("<")) |
205 | ), 0, INFINITY), |
206 | new Singleton("<<") |
207 | ))); |
208 | |
209 | /** |
210 | * 19.3 Terminals from section 3.10.2: Floating-Point Literal |
211 | */ |
212 | put("FLOATING_POINT_LITERAL", new Union( |
213 | |
214 | /** |
215 | * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]? |
216 | */ |
217 | new Concatenation( |
218 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
219 | new Singleton("."), new Concatenation( |
220 | new Repetition(PosixClass.digit(), allowShortFloatingPoints ? 0 : 1, INFINITY), new Concatenation( |
221 | new Repetition(new Concatenation( |
222 | new Match("eE"), new Concatenation( |
223 | new Repetition(new Match("-+"), 0, 1), |
224 | new Repetition(PosixClass.digit(), 1, INFINITY) |
225 | )), 0, 1), |
226 | new Repetition(new Match("fFdD"), 0, 1) |
227 | )))), new Union( |
228 | |
229 | /** |
230 | * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]? |
231 | */ |
232 | new Concatenation( |
233 | new Singleton("."), new Concatenation( |
234 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
235 | new Repetition(new Concatenation( |
236 | new Match("eE"), new Concatenation( |
237 | new Repetition(new Match("-+"), 0, 1), |
238 | new Repetition(PosixClass.digit(), 1, INFINITY) |
239 | )), 0, 1), |
240 | new Repetition(new Match("fFdD"), 0, 1) |
241 | ))), new Union( |
242 | |
243 | /** |
244 | * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]? |
245 | */ |
246 | new Concatenation( |
247 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
248 | new Match("eE"), new Concatenation( |
249 | new Repetition(new Match("-+"), 0, 1), new Concatenation( |
250 | new Repetition(PosixClass.digit(), 1, INFINITY), |
251 | new Repetition(new Match("fFdD"), 0, 1) |
252 | )))), |
253 | |
254 | /** |
255 | * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD] |
256 | */ |
257 | new Concatenation( |
258 | new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation( |
259 | new Repetition(new Concatenation( |
260 | new Match("eE"), new Concatenation( |
261 | new Repetition(new Match("-+"), 0, 1), |
262 | new Repetition(PosixClass.digit(), 1, INFINITY) |
263 | )), 0, 1), |
264 | new Match("fFdD") |
265 | )) |
266 | )))); |
267 | } |
268 | } // class MyLexicon |
269 | } |
download show line numbers debug dex old transpilations
Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #646 |
Snippet name: | Lexicon test (tokenizing) |
Eternal ID of this version: | #646/1 |
Text MD5: | 8a857a79cd64aea9ea7deeb3d0ca7a94 |
Author: | stefan |
Category: | javax |
Type: | JavaX source code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-06-26 21:22:08 |
Source code size: | 7446 bytes / 269 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 691 / 607 |
Referenced in: | [show references] |