1 | !636 |
2 | !629 // standard functions |
3 | !1000300 // class Lexicon |
4 | |
5 | main {
|
6 | psvm {
|
7 | String src = takeInput(args, null); |
8 | List<String> cnc = tokenize(src); |
9 | saveTextFile("output/output.txt", cncToLines(cnc));
|
10 | } |
11 | |
12 | static List<String> tokenize(String src) tex {
|
13 | Lex lex = new Lex(); |
14 | src = src.replace("\r\n", "\n");
|
15 | LineNumberReader source = new LineNumberReader(new StringReader(src)); |
16 | int lineNr = source.getLineNumber()+1; |
17 | List<T> list = new ArrayList<T>(); |
18 | for (Object a; (a = lex.grab(source)) != lex.$;) {
|
19 | String word = lex.word(); |
20 | String q = quote(word); |
21 | //System.out.println("grabbed at line " + lineNr + ": " + a + " " + q);
|
22 | lineNr = source.getLineNumber()+1; |
23 | |
24 | T t = new T(a, word); |
25 | boolean isSpace = t.isSpace(); |
26 | if (isSpace && list.size() > 0 && list.get(list.size()-1).isSpace()) |
27 | list.get(list.size()-1).word += word; // merge spaces |
28 | else |
29 | list.add(t); |
30 | } |
31 | |
32 | List<String> cnc = new ArrayList<String>(); |
33 | for (int i = 0; i < list.size(); ) {
|
34 | T t = list.get(i); |
35 | boolean shouldBeSpace = (cnc.size() % 2) == 0; |
36 | boolean isSpace = t.isSpace(); |
37 | if (shouldBeSpace == isSpace) {
|
38 | cnc.add(t.word); |
39 | ++i; |
40 | } else if (shouldBeSpace) |
41 | cnc.add("");
|
42 | else {
|
43 | System.out.println(cncToLines(cnc)); |
44 | throw new RuntimeException("TILT at " + cnc.size() + ": " + quote(t.word));
|
45 | } |
46 | } |
47 | if ((cnc.size() % 2) == 0) |
48 | cnc.add("");
|
49 | return cnc; |
50 | } |
51 | |
52 | static class T {
|
53 | Object a; String word; |
54 | |
55 | T(Object a, String word) { this.a = a; this.word = word; }
|
56 | |
57 | boolean isSpace() {
|
58 | return a.equals("WHITE_SPACE") || a.equals("COMMENT");
|
59 | } |
60 | } |
61 | |
62 | static String cncToLines(List<String> cnc) {
|
63 | StringBuilder out = new StringBuilder(); |
64 | for (String token : cnc) |
65 | out.append(quote(token) + "\n"); |
66 | return out.toString(); |
67 | } |
68 | |
69 | static String takeInput(String[] args, String def) tex {
|
70 | if (args.length != 0) return loadSnippet(args[0]); |
71 | return loadTextFile("input/input.txt", def);
|
72 | } |
73 | |
74 | public static String quote(String s) {
|
75 | if (s == null) return "null"; |
76 | return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\"";
|
77 | } |
78 | |
79 | static class Lex extends Lexicon {
|
80 | |
81 | Lex() {
|
82 | |
83 | /* |
84 | * TERMINAL - all letters uppercase |
85 | */ |
86 | int INFINITY = -1; |
87 | |
88 | /** |
89 | * 19.3 Terminals from section 3.6: White Space: [[:space:]] |
90 | */ |
91 | put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));
|
92 | |
93 | /** |
94 | * 19.3 Terminals from section 3.7: Comment |
95 | */ |
96 | put("COMMENT", new Union(
|
97 | |
98 | // |
99 | // Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/ |
100 | // |
101 | new Concatenation( |
102 | new Singleton("/*"), new Concatenation(
|
103 | new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
|
104 | new Repetition( |
105 | new Concatenation( |
106 | new Singleton("*"),
|
107 | new Repetition(new Concatenation( |
108 | new NonMatch("*/"),
|
109 | new Repetition(new NonMatch("*"), 0, INFINITY)
|
110 | ), 0, 1) |
111 | ), 0, INFINITY |
112 | ), |
113 | new Singleton("*/")
|
114 | ))), new Union( |
115 | |
116 | /** |
117 | * End Of Line Comment: //[^\n]*\n |
118 | */ |
119 | new Concatenation( |
120 | new Singleton("//"), new Concatenation(
|
121 | new Repetition(new NonMatch("\n"), 0, INFINITY),
|
122 | new Singleton("\n")
|
123 | )), |
124 | |
125 | // |
126 | // Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/ |
127 | // |
128 | new Concatenation( |
129 | new Singleton("/**"), new Concatenation(
|
130 | new Repetition( |
131 | new Concatenation( |
132 | new Repetition(new Concatenation( |
133 | new NonMatch("*/"),
|
134 | new Repetition(new NonMatch("*"), 0, INFINITY)
|
135 | ), 0, 1), |
136 | new Singleton("*")
|
137 | ), 0, INFINITY |
138 | ), |
139 | new Singleton("/")
|
140 | )) |
141 | ))); |
142 | |
143 | put("IDENTIFIER", new Concatenation(
|
144 | new Union( |
145 | PosixClass.alpha(), |
146 | new Match("_$")
|
147 | ), |
148 | new Repetition( |
149 | new Union( |
150 | PosixClass.alnum(), |
151 | new Match("_$")
|
152 | ), 0, INFINITY |
153 | ) |
154 | )); |
155 | |
156 | /** |
157 | * 19.3 Terminals from section 3.10.5: String Literal |
158 | */ |
159 | put("STRING_LITERAL", new Concatenation(
|
160 | new Singleton("\""), new Concatenation(
|
161 | new Repetition( |
162 | new Union( |
163 | |
164 | /** |
165 | * Single Character: [^\r\n"\\] |
166 | */ |
167 | new NonMatch("\r\n\"\\"),
|
168 | |
169 | /** |
170 | * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
|
171 | */ |
172 | new Concatenation( |
173 | new Singleton("\\"),
|
174 | new Union( |
175 | new Match("btnfr\"'\\"),
|
176 | new Concatenation( |
177 | new Repetition(new Range('0', '3'), 0, 1),
|
178 | new Repetition(new Range('0', '7'), 1, 2)
|
179 | ) |
180 | ) |
181 | ) |
182 | ), 0, INFINITY |
183 | ), |
184 | new Singleton("\"")
|
185 | ))); |
186 | |
187 | // Single-character catch-all production so we can parse anything. |
188 | |
189 | put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time
|
190 | |
191 | } |
192 | } // class Lex |
193 | } |
Began life as a copy of #655
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1000323 |
| Snippet name: | Spaces, comments, words, strings (Tokenizer, embeddable, developing) |
| Eternal ID of this version: | #1000323/1 |
| Text MD5: | 1130cfc8b659aee0598f502ea9276c55 |
| Author: | stefan |
| Category: | javax |
| Type: | JavaX (input.txt to output.txt) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-07-06 18:39:38 |
| Source code size: | 4994 bytes / 193 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 1026 / 878 |
| Referenced in: | [show references] |