Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

338
LINES

< > BotCompany Repo | #1000306 // JSON tokenizer (abandoned)

JavaX (input.txt to output.txt)

1  
!636
2  
!629 // standard functions
3  
!1000305 // class JSONTokenizer
4  
5  
import JSONTokenizer.TokenType;
6  
7  
main {
8  
  psvm {
9  
    String src = takeInput(args, null);
10  
    
11  
    src = src.replace("\r\n", "\n");
12  
    JSONTokenizer lex = new JSONTokenizer(src);
13  
    
14  
    List<T> list = new ArrayList<T>();
15  
    for (TokenType a; (a = lex.nextToken()) != TokenType.EOF;) {
16  
      String word = lex.getToken();
17  
      String q = quote(word);
18  
      T t = new T(a, word);
19  
      boolean isSpace = t.isSpace();
20  
      if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace())
21  
        list.get(list.size()-1).word += word; // merge spaces
22  
      else
23  
        list.add(t);
24  
    }
25  
    
26  
    List<String> cnc = new ArrayList<String>();
27  
    for (int i = 0; i < list.size(); ) {
28  
      T t = list.get(i);
29  
      boolean shouldBeSpace = (cnc.size() % 2) == 0;
30  
      boolean isSpace = t.isSpace();
31  
      if (shouldBeSpace == isSpace) {
32  
        cnc.add(t.word);
33  
        ++i;
34  
      } else if (shouldBeSpace)
35  
        cnc.add("");
36  
      else {
37  
        System.out.println(cncToLines(cnc));
38  
        throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word));
39  
      }
40  
    }
41  
    if ((cnc.size() % 2) == 0)
42  
      cnc.add("");
43  
44  
    saveTextFile("output/output.txt", cncToLines(cnc));
45  
  }
46  
  
47  
  static class T {
48  
    TokenType a; String word;
49  
    
50  
    T(TokenType a, String word) { this.a = a; this.word = word; }
51  
    
52  
    boolean isSpace() {
53  
      return a.equals("WHITE_SPACE") || a.equals("COMMENT");
54  
    }
55  
  }
56  
  
57  
  static String cncToLines(List<String> cnc) {
58  
    StringBuilder out = new StringBuilder();
59  
    for (String token : cnc)
60  
      out.append(quote(token) + "\n");
61  
    return out.toString();
62  
  }
63  
  
64  
  static String takeInput(String[] args, String def) tex {
65  
    if (args.length != 0) return loadSnippet(args[0]);
66  
    return loadTextFile("input/input.txt", def);
67  
  }
68  
  
69  
  public static String quote(String s) {
70  
    if (s == null) return "null";
71  
    return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\"";
72  
  }
73  
  
74  
  static class Java20 extends Lexicon {
75  
76  
	Java20() {
77  
78  
		/**
79  
		* Grammar for Java 2.0.
80  
		*
81  
		* Nonterminal - first letter uppercase
82  
		* TERMINAL - all letters uppercase
83  
		* keyword - all letters lowercase
84  
		*/
85  
		int INFINITY = -1;
86  
87  
		/**
88  
		* 19.3 Terminals from section 3.6: White Space: [[:space:]]
89  
		*/
90  
		put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));
91  
92  
		/**
93  
		* 19.3 Terminals from section 3.7: Comment
94  
		*/
95  
		put("COMMENT", new Union(
96  
97  
			//
98  
			// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
99  
			//
100  
			new Concatenation(
101  
				new Singleton("/*"), new Concatenation(
102  
				new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
103  
				new Repetition(
104  
					new Concatenation(
105  
						new Singleton("*"),
106  
						new Repetition(new Concatenation(
107  
							new NonMatch("*/"),
108  
							new Repetition(new NonMatch("*"), 0, INFINITY)
109  
						), 0, 1)
110  
					), 0, INFINITY
111  
				),
112  
				new Singleton("*/")
113  
			))), new Union(
114  
115  
			/**
116  
			* End Of Line Comment: //[^\n]*\n
117  
			*/
118  
			new Concatenation(
119  
				new Singleton("//"), new Concatenation(
120  
				new Repetition(new NonMatch("\n"), 0, INFINITY),
121  
				new Singleton("\n")
122  
			)),
123  
124  
			//
125  
			// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
126  
			//
127  
			new Concatenation(
128  
				new Singleton("/**"), new Concatenation(
129  
				new Repetition(
130  
					new Concatenation(
131  
						new Repetition(new Concatenation(
132  
							new NonMatch("*/"),
133  
							new Repetition(new NonMatch("*"), 0, INFINITY)
134  
						), 0, 1),
135  
						new Singleton("*")
136  
					), 0, INFINITY
137  
				),
138  
				new Singleton("/")
139  
			))
140  
		)));
141  
142  
		put("IDENTIFIER", new Concatenation(
143  
			new Union(
144  
				PosixClass.alpha(),
145  
				new Match("_$")
146  
			),
147  
			new Repetition(
148  
				new Union(
149  
					PosixClass.alnum(),
150  
					new Match("_$")
151  
				), 0, INFINITY
152  
			)
153  
		));
154  
155  
		/**
156  
		* 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar)
157  
		*/
158  
		put("KEYWORD", new Union(
159  
			new Singleton("const"),
160  
			new Singleton("goto")
161  
		));
162  
163  
		/**
164  
		* 19.3 Terminals from section 3.10.1: Integer Literal
165  
		*/
166  
		put("INTEGER_LITERAL", new Concatenation(
167  
			new Union(
168  
				/**
169  
				* Decimal Integer Literal: 0|[1-9][[:digit:]]*
170  
				*/
171  
				new Singleton("0"), new Union(
172  
173  
				new Concatenation(
174  
					new Range('1', '9'),
175  
					new Repetition(PosixClass.digit(), 0, INFINITY)
176  
				), new Union(
177  
178  
				/**
179  
				* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
180  
				*/
181  
				new Concatenation(
182  
					new Singleton("0"), new Concatenation(
183  
					new Match("xX"),
184  
					new Repetition(PosixClass.xdigit(), 1, INFINITY)
185  
				)),
186  
187  
				/**
188  
				* Octal Integer Literal: 0[0-7]+
189  
				*/
190  
				new Concatenation(
191  
					new Singleton("0"),
192  
					new Repetition(new Range('0', '7'), 1, INFINITY)
193  
				)
194  
			))),
195  
			new Repetition(new Match("lL"), 0, 1)
196  
		));
197  
198  
		/**
199  
		* 19.3 Terminals from section 3.10.2: Floating-Point Literal
200  
		*/
201  
		put("FLOATING_POINT_LITERAL", new Union(
202  
203  
			/**
204  
			* [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
205  
			*/
206  
			new Concatenation(
207  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
208  
				new Singleton("."), new Concatenation(
209  
				new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation(
210  
				new Repetition(new Concatenation(
211  
					new Match("eE"), new Concatenation(
212  
					new Repetition(new Match("-+"), 0, 1),
213  
					new Repetition(PosixClass.digit(), 1, INFINITY)
214  
				)), 0, 1),
215  
				new Repetition(new Match("fFdD"), 0, 1)
216  
			)))), new Union(
217  
218  
			/**
219  
			* \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
220  
			*/
221  
			new Concatenation(
222  
				new Singleton("."), new Concatenation(
223  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
224  
				new Repetition(new Concatenation(
225  
					new Match("eE"), new Concatenation(
226  
					new Repetition(new Match("-+"), 0, 1),
227  
					new Repetition(PosixClass.digit(), 1, INFINITY)
228  
				)), 0, 1),
229  
				new Repetition(new Match("fFdD"), 0, 1)
230  
			))), new Union(
231  
232  
			/**
233  
			* [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
234  
			*/
235  
			new Concatenation(
236  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
237  
				new Match("eE"), new Concatenation(
238  
				new Repetition(new Match("-+"), 0, 1), new Concatenation(
239  
				new Repetition(PosixClass.digit(), 1, INFINITY),
240  
				new Repetition(new Match("fFdD"), 0, 1)
241  
			)))),
242  
243  
			/**
244  
			* [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
245  
			*/
246  
			new Concatenation(
247  
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
248  
				new Repetition(new Concatenation(
249  
					new Match("eE"), new Concatenation(
250  
					new Repetition(new Match("-+"), 0, 1),
251  
					new Repetition(PosixClass.digit(), 1, INFINITY)
252  
				)), 0, 1),
253  
				new Match("fFdD")
254  
			))
255  
		))));
256  
257  
		/**
258  
		* 19.3 Terminals from section 3.10.3: Boolean Literal
259  
		*/
260  
		put("BOOLEAN_LITERAL", new Union(
261  
			new Singleton("true"),
262  
			new Singleton("false")
263  
		));
264  
265  
		/**
266  
		* 19.3 Terminals from section 3.10.4: Character Literal
267  
		*/
268  
		put("CHARACTER_LITERAL", new Concatenation(
269  
			new Singleton("'"), new Concatenation(
270  
			new Union(
271  
272  
				/**
273  
				* Single Character: [^\r\n'\\]
274  
				*/
275  
				new NonMatch("\r\n'\\"),
276  
277  
				/**
278  
				* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
279  
				*/
280  
				new Concatenation(
281  
					new Singleton("\\"),
282  
					new Union(
283  
						new Match("btnfr\"'\\"),
284  
						new Concatenation(
285  
							new Repetition(new Range('0', '3'), 0, 1),
286  
							new Repetition(new Range('0', '7'), 1, 2)
287  
						)
288  
					)
289  
				)
290  
			),
291  
			new Singleton("'")
292  
		)));
293  
294  
		/**
295  
		* 19.3 Terminals from section 3.10.5: String Literal
296  
		*/
297  
		put("STRING_LITERAL", new Concatenation(
298  
			new Singleton("\""), new Concatenation(
299  
			new Repetition(
300  
				new Union(
301  
302  
					/**
303  
					* Single Character: [^\r\n"\\]
304  
					*/
305  
					new NonMatch("\r\n\"\\"),
306  
307  
					/**
308  
					* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
309  
					*/
310  
					new Concatenation(
311  
						new Singleton("\\"),
312  
						new Union(
313  
							new Match("btnfr\"'\\"),
314  
							new Concatenation(
315  
								new Repetition(new Range('0', '3'), 0, 1),
316  
								new Repetition(new Range('0', '7'), 1, 2)
317  
							)
318  
						)
319  
					)
320  
				), 0, INFINITY
321  
			),
322  
			new Singleton("\"")
323  
		)));
324  
325  
		/**
326  
		* 19.3 Terminals section 3.10.7: Null Literal
327  
		*/
328  
		put("NULL_LITERAL", new Singleton("null"));
329  
		
330  
		// OK, it seems we have to add some more stuff...
331  
		
332  
		//put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
333  
		//put("OTHER1", new NonMatch("")); // catch anything, one character at a time
334  
		put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time
335  
336  
	}
337  
} // class Java20
338  
}

Author comment

Began life as a copy of #651

download  show line numbers  debug dex  old transpilations   

Travelled to 13 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1000306
Snippet name: JSON tokenizer (abandoned)
Eternal ID of this version: #1000306/1
Text MD5: 0b7580f93160040e20c30bbbd9290d03
Author: stefan
Category: javax
Type: JavaX (input.txt to output.txt)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-28 18:43:47
Source code size: 8808 bytes / 338 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 733 / 508
Referenced in: [show references]