1 | /* |
2 | * Copyright 2008-2010 the T2 Project ant the Others. |
3 | */ |
4 | |
5 | class JSONTokenizer { |
6 | |
7 | public static final char COMMA = ','; |
8 | |
9 | public static final char COLON = ':'; |
10 | |
11 | public static final char SINGLE_QUOTE = '\''; |
12 | |
13 | public static final char DOUBLE_QUOTE = '\"'; |
14 | |
15 | public static final char START_BRACKET = '['; |
16 | |
17 | public static final char END_BRACKET = ']'; |
18 | |
19 | public static final char START_BRACE = '{'; |
20 | |
21 | public static final char END_BRACE = '}'; |
22 | |
23 | // public static final char UTF8_BOM = 0xFEFF; |
24 | |
25 | protected final String orgString; |
26 | |
27 | protected final int orgLength; |
28 | |
29 | protected String tokenString; |
30 | |
31 | protected int pos; |
32 | |
33 | protected int nextPos; |
34 | |
35 | protected TokenType type = TokenType.EOF; |
36 | |
37 | public JSONTokenizer(String s) { |
38 | // TODO : check grammer first. |
39 | this.orgString = (s != null) ? s : "null"; |
40 | this.orgLength = this.orgString.length(); |
41 | this.tokenString = this.orgString; |
42 | prepare(0); |
43 | } |
44 | |
45 | protected void prepare(int i) { |
46 | if (i < orgLength) { |
47 | char ch = orgString.charAt(i); |
48 | if (ch == START_BRACE) { |
49 | type = TokenType.START_BRACE; |
50 | } else if (ch == END_BRACE) { |
51 | type = TokenType.END_BRACE; |
52 | } else if (ch == START_BRACKET) { |
53 | type = TokenType.START_BRACKET; |
54 | } else if (ch == END_BRACKET) { |
55 | type = TokenType.END_BRACKET; |
56 | } |
57 | this.pos = i; |
58 | this.nextPos = i + 1; |
59 | } else { |
60 | type = TokenType.EOF; |
61 | } |
62 | } |
63 | |
64 | public TokenType nextToken() { |
65 | if (type == TokenType.EOF) { |
66 | return TokenType.EOF; |
67 | } |
68 | String s = this.tokenString; |
69 | TokenType retType = TokenType.EOF; |
70 | boolean key = true; |
71 | for (int i = this.nextPos; i < this.orgLength; i++) { |
72 | char ch = this.orgString.charAt(i); |
73 | if (isIgnorable(ch)) { |
74 | continue; |
75 | } |
76 | if (ch == START_BRACE) { |
77 | s = getElement(nextPos, this.orgLength - 1); |
78 | retType = TokenType.START_BRACE; |
79 | prepare(i); |
80 | key = true; |
81 | break; |
82 | } else if (ch == START_BRACKET) { |
83 | s = getElement(nextPos, this.orgLength - 1); |
84 | retType = TokenType.START_BRACKET; |
85 | prepare(i); |
86 | key = true; |
87 | break; |
88 | } else if (ch == COLON) { |
89 | if (i == this.orgLength - 1) { |
90 | throw new IllegalStateException(); |
91 | } |
92 | s = getElement(nextPos, i - 1); |
93 | this.type = retType = TokenType.COLON; |
94 | prepare(i); |
95 | key = true; |
96 | break; |
97 | } else if (ch == COMMA) { |
98 | if (i == this.orgLength - 1) { |
99 | throw new IllegalArgumentException(); |
100 | } |
101 | s = getElement(nextPos, i - 1); |
102 | this.type = retType = (isObjectOrArrayEnd(i - 1)) ? TokenType.END_COMMA |
103 | : TokenType.COMMA; |
104 | prepare(i); |
105 | key = false; |
106 | break; |
107 | } else if (ch == END_BRACKET) { |
108 | this.type = (i == this.orgLength - 1) ? TokenType.EOF |
109 | : TokenType.END_BRACKET; |
110 | retType = TokenType.END_BRACKET; |
111 | s = getElement(nextPos, i - 1); |
112 | prepare(i); |
113 | key = false; |
114 | break; |
115 | } else if (ch == END_BRACE) { |
116 | this.type = (i == this.orgLength - 1) ? TokenType.EOF |
117 | : TokenType.END_BRACE; |
118 | retType = TokenType.END_BRACE; |
119 | s = getElement(this.nextPos, i - 1); |
120 | prepare(i); |
121 | key = false; |
122 | break; |
123 | } |
124 | } |
125 | s = removeIgnorable(s); |
126 | this.tokenString = (key) ? unquote(s) : s; |
127 | return retType; |
128 | } |
129 | |
130 | protected boolean isObjectOrArrayEnd(int pos) { |
131 | for (int i = pos; 0 < i; i--) { |
132 | char c = this.orgString.charAt(i); |
133 | if (isIgnorable(c)) { |
134 | continue; |
135 | } |
136 | if (c == END_BRACE || c == END_BRACKET) { |
137 | return true; |
138 | } else { |
139 | return false; |
140 | } |
141 | } |
142 | return false; |
143 | } |
144 | |
145 | /** |
146 | * <#if locale="en"> |
147 | * <p> |
148 | * TODO refactor. |
149 | * |
150 | * </p> |
151 | * <#else> |
152 | * <p> |
153 | * |
154 | * </p> |
155 | * </#if> |
156 | * |
157 | * @param s |
158 | * @return |
159 | */ |
160 | protected String removeIgnorable(String s) { |
161 | if (isEmpty(s)) { |
162 | return s; |
163 | } |
164 | for (int pos1 = 0; pos1 < s.length(); pos1++) { |
165 | if (isIgnorable(s.charAt(pos1)) == false) { |
166 | return s.substring(pos1); |
167 | } |
168 | } |
169 | return ""; |
170 | } |
171 | |
172 | public static boolean isEmpty(String text) { |
173 | return text == null || text.length() == 0; |
174 | } |
175 | |
176 | protected String getElement(final int orgStartPos, final int orgEndPos) { |
177 | int startPos = orgStartPos; |
178 | for (; startPos < orgEndPos; startPos++) { |
179 | char ch = this.orgString.charAt(startPos); |
180 | if (isIgnorable(ch) == false) { |
181 | break; |
182 | } |
183 | } |
184 | int endPos = orgEndPos; |
185 | for (; startPos < endPos; endPos--) { |
186 | char ch = this.orgString.charAt(endPos); |
187 | if (isIgnorable(ch) == false) { |
188 | break; |
189 | } |
190 | } |
191 | return this.orgString.substring(startPos, endPos + 1); |
192 | } |
193 | |
194 | protected static boolean isIgnorable(char ch) { |
195 | boolean ret = false; |
196 | switch (ch) { |
197 | case '\b': |
198 | case '\f': |
199 | case '\n': |
200 | case '\r': |
201 | case '\t': |
202 | case ' ': |
203 | ret = true; |
204 | break; |
205 | default: |
206 | ret = false; |
207 | break; |
208 | } |
209 | return ret; |
210 | } |
211 | |
212 | public static String unquote(String str) { |
213 | if (str == null || str.length() == 0) { |
214 | return str; |
215 | } |
216 | if (isQuoted(str, DOUBLE_QUOTE)) { |
217 | return chopQuote(str); |
218 | } else if (isQuoted(str, SINGLE_QUOTE)) { |
219 | return chopQuote(str); |
220 | } else { |
221 | return str; |
222 | } |
223 | } |
224 | |
225 | public static String chopQuote(String str) { |
226 | return str.substring(1, str.length() - 1); |
227 | } |
228 | |
229 | protected static boolean isQuoted(String str, char quote) { |
230 | return str.indexOf(quote) == 0 && str.lastIndexOf(quote) > 0; |
231 | } |
232 | |
233 | public String getToken() { |
234 | return this.tokenString; |
235 | } |
236 | |
237 | public static enum TokenType { |
238 | /** |
239 | * <#if locale="en"> |
240 | * <p> |
241 | * "{" |
242 | * </p> |
243 | * <#else> |
244 | * <p> |
245 | * |
246 | * </p> |
247 | * </#if> |
248 | */ |
249 | START_BRACE { |
250 | public String toString() { |
251 | return "{"; |
252 | } |
253 | }, |
254 | |
255 | /** |
256 | * <#if locale="en"> |
257 | * <p> |
258 | * "}" |
259 | * </p> |
260 | * <#else> |
261 | * <p> |
262 | * |
263 | * </p> |
264 | * </#if> |
265 | */ |
266 | END_BRACE { |
267 | public String toString() { |
268 | return "}"; |
269 | } |
270 | }, |
271 | |
272 | /** |
273 | * <#if locale="en"> |
274 | * <p> |
275 | * "[" |
276 | * </p> |
277 | * <#else> |
278 | * <p> |
279 | * |
280 | * </p> |
281 | * </#if> |
282 | */ |
283 | START_BRACKET { |
284 | public String toString() { |
285 | return "["; |
286 | } |
287 | }, |
288 | |
289 | /** |
290 | * <#if locale="en"> |
291 | * <p> |
292 | * "]" |
293 | * </p> |
294 | * <#else> |
295 | * <p> |
296 | * |
297 | * </p> |
298 | * </#if> |
299 | */ |
300 | END_BRACKET { |
301 | public String toString() { |
302 | return "]"; |
303 | } |
304 | }, |
305 | |
306 | /** |
307 | * <#if locale="en"> |
308 | * <p> |
309 | * "," |
310 | * </p> |
311 | * <#else> |
312 | * <p> |
313 | * |
314 | * </p> |
315 | * </#if> |
316 | */ |
317 | COMMA { |
318 | public String toString() { |
319 | return ","; |
320 | } |
321 | }, |
322 | |
323 | /** |
324 | * <#if locale="en"> |
325 | * <p> |
326 | * ":" |
327 | * </p> |
328 | * <#else> |
329 | * <p> |
330 | * |
331 | * </p> |
332 | * </#if> |
333 | */ |
334 | COLON { |
335 | public String toString() { |
336 | return ":"; |
337 | } |
338 | }, |
339 | |
340 | /** |
341 | * <#if locale="en"> |
342 | * <p> |
343 | * "," and it is the end of {} or []. |
344 | * </p> |
345 | * <#else> |
346 | * <p> |
347 | * |
348 | * </p> |
349 | * </#if> |
350 | */ |
351 | END_COMMA { |
352 | public String toString() { |
353 | return ","; |
354 | } |
355 | }, |
356 | |
357 | /** |
358 | * <#if locale="en"> |
359 | * <p> |
360 | * End of file. |
361 | * </p> |
362 | * <#else> |
363 | * <p> |
364 | * |
365 | * </p> |
366 | * </#if> |
367 | */ |
368 | EOF; |
369 | |
370 | } |
371 | |
372 | public TokenType currentTokenType() { |
373 | return type; |
374 | } |
375 | } |
Snippet is not live.
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #2000400 |
Snippet name: | class JSONTokenizer |
Eternal ID of this version: | #2000400/1 |
Text MD5: | 0ad8de1e884015368a85b40a0e50fde6 |
Author: | stefan |
Category: | javax |
Type: | New Tinybrain snippet |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-06-28 18:30:22 |
Source code size: | 7862 bytes / 375 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 660 / 426 |
Referenced in: | [show references] |