1 | !636 |
2 | !629 // standard functions |
3 | !1000300 // class Lexicon |
4 | |
5 | main { |
6 | psvm { |
7 | String src = takeInput(args, null); |
8 | List<String> cnc = tokenize(src); |
9 | saveTextFile("output/output.txt", cncToLines(cnc)); |
10 | } |
11 | |
12 | static List<String> tokenize(String src) tex { |
13 | Lex lex = new Lex(); |
14 | src = src.replace("\r\n", "\n"); |
15 | LineNumberReader source = new LineNumberReader(new StringReader(src)); |
16 | int lineNr = source.getLineNumber()+1; |
17 | List<T> list = new ArrayList<T>(); |
18 | for (Object a; (a = lex.grab(source)) != lex.$;) { |
19 | String word = lex.word(); |
20 | String q = quote(word); |
21 | //System.out.println("grabbed at line " + lineNr + ": " + a + " " + q); |
22 | lineNr = source.getLineNumber()+1; |
23 | |
24 | T t = new T(a, word); |
25 | boolean isSpace = t.isSpace(); |
26 | if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) |
27 | list.get(list.size()-1).word += word; // merge spaces |
28 | else |
29 | list.add(t); |
30 | } |
31 | |
32 | List<String> cnc = new ArrayList<String>(); |
33 | for (int i = 0; i < list.size(); ) { |
34 | T t = list.get(i); |
35 | boolean shouldBeSpace = (cnc.size() % 2) == 0; |
36 | boolean isSpace = t.isSpace(); |
37 | if (shouldBeSpace == isSpace) { |
38 | cnc.add(t.word); |
39 | ++i; |
40 | } else if (shouldBeSpace) |
41 | cnc.add(""); |
42 | else { |
43 | System.out.println(cncToLines(cnc)); |
44 | throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word)); |
45 | } |
46 | } |
47 | if ((cnc.size() % 2) == 0) |
48 | cnc.add(""); |
49 | return cnc; |
50 | } |
51 | |
52 | static class T { |
53 | Object a; String word; |
54 | |
55 | T(Object a, String word) { this.a = a; this.word = word; } |
56 | |
57 | boolean isSpace() { |
58 | return a.equals("WHITE_SPACE") || a.equals("COMMENT"); |
59 | } |
60 | } |
61 | |
62 | static String cncToLines(List<String> cnc) { |
63 | StringBuilder out = new StringBuilder(); |
64 | for (String token : cnc) |
65 | out.append(quote(token) + "\n"); |
66 | return out.toString(); |
67 | } |
68 | |
69 | static String takeInput(String[] args, String def) tex { |
70 | if (args.length != 0) return loadSnippet(args[0]); |
71 | return loadTextFile("input/input.txt", def); |
72 | } |
73 | |
74 | public static String quote(String s) { |
75 | if (s == null) return "null"; |
76 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; |
77 | } |
78 | |
79 | static class Lex extends Lexicon { |
80 | |
81 | Lex() { |
82 | |
83 | /* |
84 | * TERMINAL - all letters uppercase |
85 | */ |
86 | int INFINITY = -1; |
87 | |
88 | /** |
89 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
90 | */ |
91 | put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY)); |
92 | |
93 | /** |
94 | * 19.3 Terminals from section 3.7: Comment |
95 | */ |
96 | put("COMMENT", new Union( |
97 | |
98 | // |
99 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
100 | // |
101 | new Concatenation( |
102 | new Singleton("/*"), new Concatenation( |
103 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( |
104 | new Repetition( |
105 | new Concatenation( |
106 | new Singleton("*"), |
107 | new Repetition(new Concatenation( |
108 | new NonMatch("*/"), |
109 | new Repetition(new NonMatch("*"), 0, INFINITY) |
110 | ), 0, 1) |
111 | ), 0, INFINITY |
112 | ), |
113 | new Singleton("*/") |
114 | ))), new Union( |
115 | |
116 | /** |
117 | * End Of Line Comment: //[^\n]*\n |
118 | */ |
119 | new Concatenation( |
120 | new Singleton("//"), new Concatenation( |
121 | new Repetition(new NonMatch("\n"), 0, INFINITY), |
122 | new Singleton("\n") |
123 | )), |
124 | |
125 | // |
126 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
127 | // |
128 | new Concatenation( |
129 | new Singleton("/**"), new Concatenation( |
130 | new Repetition( |
131 | new Concatenation( |
132 | new Repetition(new Concatenation( |
133 | new NonMatch("*/"), |
134 | new Repetition(new NonMatch("*"), 0, INFINITY) |
135 | ), 0, 1), |
136 | new Singleton("*") |
137 | ), 0, INFINITY |
138 | ), |
139 | new Singleton("/") |
140 | )) |
141 | ))); |
142 | |
143 | put("IDENTIFIER", new Concatenation( |
144 | new Union( |
145 | PosixClass.alpha(), |
146 | new Match("_$") |
147 | ), |
148 | new Repetition( |
149 | new Union( |
150 | PosixClass.alnum(), |
151 | new Match("_$") |
152 | ), 0, INFINITY |
153 | ) |
154 | )); |
155 | |
156 | /** |
157 | * 19.3 Terminals from section 3.10.5: String Literal |
158 | */ |
159 | put("STRING_LITERAL", new Concatenation( |
160 | new Singleton("\""), new Concatenation( |
161 | new Repetition( |
162 | new Union( |
163 | |
164 | /** |
165 | * Single Character: [^\r\n"\\] |
166 | */ |
167 | new NonMatch("\r\n\"\\"), |
168 | |
169 | /** |
170 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
171 | */ |
172 | new Concatenation( |
173 | new Singleton("\\"), |
174 | new Union( |
175 | new Match("btnfr\"'\\"), |
176 | new Concatenation( |
177 | new Repetition(new Range('0', '3'), 0, 1), |
178 | new Repetition(new Range('0', '7'), 1, 2) |
179 | ) |
180 | ) |
181 | ) |
182 | ), 0, INFINITY |
183 | ), |
184 | new Singleton("\"") |
185 | ))); |
186 | |
187 | // Single-character catch-all production so we can parse anything. |
188 | |
189 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time |
190 | |
191 | } |
192 | } // class Lex |
193 | } |
Began life as a copy of #655
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1000323 |
Snippet name: | Spaces, comments, words, strings (Tokenizer, embeddable, developing) |
Eternal ID of this version: | #1000323/1 |
Text MD5: | 1130cfc8b659aee0598f502ea9276c55 |
Author: | stefan |
Category: | javax |
Type: | JavaX (input.txt to output.txt) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-07-06 18:39:38 |
Source code size: | 4994 bytes / 193 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 733 / 643 |
Referenced in: | [show references] |