1 | !636 |
2 | !629 // standard functions |
3 | !1000300 // class Lexicon |
4 | |
5 | main { |
6 | psvm { |
7 | String src = takeInput(args, null); |
8 | Lex lex = new Lex(); |
9 | src = src.replace("\r\n", "\n"); |
10 | LineNumberReader source = new LineNumberReader(new StringReader(src)); |
11 | int lineNr = source.getLineNumber()+1; |
12 | List<T> list = new ArrayList<T>(); |
13 | for (Object a; (a = lex.grab(source)) != lex.$;) { |
14 | String word = lex.word(); |
15 | String q = quote(word); |
16 | //System.out.println("grabbed at line " + lineNr + ": " + a + " " + q); |
17 | lineNr = source.getLineNumber()+1; |
18 | |
19 | T t = new T(a, word); |
20 | boolean isSpace = t.isSpace(); |
21 | if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) |
22 | list.get(list.size()-1).word += word; // merge spaces |
23 | else |
24 | list.add(t); |
25 | } |
26 | |
27 | List<String> cnc = new ArrayList<String>(); |
28 | for (int i = 0; i < list.size(); ) { |
29 | T t = list.get(i); |
30 | boolean shouldBeSpace = (cnc.size() % 2) == 0; |
31 | boolean isSpace = t.isSpace(); |
32 | if (shouldBeSpace == isSpace) { |
33 | cnc.add(t.word); |
34 | ++i; |
35 | } else if (shouldBeSpace) |
36 | cnc.add(""); |
37 | else { |
38 | System.out.println(cncToLines(cnc)); |
39 | throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word)); |
40 | } |
41 | } |
42 | if ((cnc.size() % 2) == 0) |
43 | cnc.add(""); |
44 | |
45 | saveTextFile("output/output.txt", cncToLines(cnc)); |
46 | } |
47 | |
48 | static class T { |
49 | Object a; String word; |
50 | |
51 | T(Object a, String word) { this.a = a; this.word = word; } |
52 | |
53 | boolean isSpace() { |
54 | return a.equals("WHITE_SPACE") || a.equals("COMMENT"); |
55 | } |
56 | } |
57 | |
58 | static String cncToLines(List<String> cnc) { |
59 | StringBuilder out = new StringBuilder(); |
60 | for (String token : cnc) |
61 | out.append(quote(token) + "\n"); |
62 | return out.toString(); |
63 | } |
64 | |
65 | static String takeInput(String[] args, String def) tex { |
66 | if (args.length != 0) return loadSnippet(args[0]); |
67 | return loadTextFile("input/input.txt", def); |
68 | } |
69 | |
70 | public static String quote(String s) { |
71 | if (s == null) return "null"; |
72 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\""; |
73 | } |
74 | |
75 | static class Lex extends Lexicon { |
76 | |
77 | Lex() { |
78 | |
79 | /* |
80 | * TERMINAL - all letters uppercase |
81 | */ |
82 | int INFINITY = -1; |
83 | |
84 | /** |
85 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
86 | */ |
87 | put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY)); |
88 | |
89 | /** |
90 | * 19.3 Terminals from section 3.7: Comment |
91 | */ |
92 | put("COMMENT", new Union( |
93 | |
94 | // |
95 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
96 | // |
97 | new Concatenation( |
98 | new Singleton("/*"), new Concatenation( |
99 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation( |
100 | new Repetition( |
101 | new Concatenation( |
102 | new Singleton("*"), |
103 | new Repetition(new Concatenation( |
104 | new NonMatch("*/"), |
105 | new Repetition(new NonMatch("*"), 0, INFINITY) |
106 | ), 0, 1) |
107 | ), 0, INFINITY |
108 | ), |
109 | new Singleton("*/") |
110 | ))), new Union( |
111 | |
112 | /** |
113 | * End Of Line Comment: //[^\n]*\n |
114 | */ |
115 | new Concatenation( |
116 | new Singleton("//"), new Concatenation( |
117 | new Repetition(new NonMatch("\n"), 0, INFINITY), |
118 | new Singleton("\n") |
119 | )), |
120 | |
121 | // |
122 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
123 | // |
124 | new Concatenation( |
125 | new Singleton("/**"), new Concatenation( |
126 | new Repetition( |
127 | new Concatenation( |
128 | new Repetition(new Concatenation( |
129 | new NonMatch("*/"), |
130 | new Repetition(new NonMatch("*"), 0, INFINITY) |
131 | ), 0, 1), |
132 | new Singleton("*") |
133 | ), 0, INFINITY |
134 | ), |
135 | new Singleton("/") |
136 | )) |
137 | ))); |
138 | |
139 | put("IDENTIFIER", new Concatenation( |
140 | new Union( |
141 | PosixClass.alpha(), |
142 | new Match("_$") |
143 | ), |
144 | new Repetition( |
145 | new Union( |
146 | PosixClass.alnum(), |
147 | new Match("_$") |
148 | ), 0, INFINITY |
149 | ) |
150 | )); |
151 | |
152 | /** |
153 | * 19.3 Terminals from section 3.10.5: String Literal |
154 | */ |
155 | put("STRING_LITERAL", new Concatenation( |
156 | new Singleton("\""), new Concatenation( |
157 | new Repetition( |
158 | new Union( |
159 | |
160 | /** |
161 | * Single Character: [^\r\n"\\] |
162 | */ |
163 | new NonMatch("\r\n\"\\"), |
164 | |
165 | /** |
166 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2}) |
167 | */ |
168 | new Concatenation( |
169 | new Singleton("\\"), |
170 | new Union( |
171 | new Match("btnfr\"'\\"), |
172 | new Concatenation( |
173 | new Repetition(new Range('0', '3'), 0, 1), |
174 | new Repetition(new Range('0', '7'), 1, 2) |
175 | ) |
176 | ) |
177 | ) |
178 | ), 0, INFINITY |
179 | ), |
180 | new Singleton("\"") |
181 | ))); |
182 | |
183 | // Single-character catch-all production so we can parse anything. |
184 | |
185 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time |
186 | |
187 | } |
188 | } // class Lex |
189 | } |
Began life as a copy of #651
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #655 |
Snippet name: | Spaces, comments, words, strings (Tokenizer) |
Eternal ID of this version: | #655/1 |
Text MD5: | 4c83a001a302a8beb62837e767f4fb28 |
Author: | stefan |
Category: | javax |
Type: | JavaX (input.txt to output.txt) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-07-06 18:02:11 |
Source code size: | 4881 bytes / 189 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 712 / 871 |
Referenced in: | [show references] |