Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

269
LINES

< > BotCompany Repo | #646 // Lexicon test (tokenizing)

JavaX source code - run with: x30.jar

1  
!636
2  
!1000300 // class Lexicon
3  
4  
main {
5  
  psvm {
6  
    String src = "class main { String s; }";
7  
    MyLexicon lex = new MyLexicon();
8  
    lex.lexicalRules(true);
9  
    LineNumberReader source = new LineNumberReader(new StringReader(src));
10  
    for (Object a; (a = lex.grab(source)) != lex.$;) {
11  
      System.out.println("grabbed: " + a);
12  
    }
13  
  }
14  
  
15  
  static class MyLexicon extends Lexicon {
16  
   private static final String idChars1 = "_$?-#+";
17  
   private static final String idChars2 = "_$-.+";
18  
   
19  
   void lexicalRules(boolean allowShortFloatingPoints) {
20  
    // These rules are taken from Java10.java (part of gi-0.9 distribution)
21  
22  
		int INFINITY = -1;
23  
24  
		/**
25  
		* 19.3 Terminals from section 3.6: White Space: [[:space:]]
26  
		*/
27  
		put("WHITE_SPACE", PosixClass.space());
28  
29  
		/**
30  
		* 19.3 Terminals from section 3.7: Comment
31  
		*/
32  
		put("COMMENT", new Union(
33  
34  
			//
35  
			// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
36  
			//
37  
			new Concatenation(
38  
				new Singleton("/*"), new Concatenation(
39  
				new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
40  
				new Repetition(
41  
					new Concatenation(
42  
						new Singleton("*"),
43  
						new Repetition(new Concatenation(
44  
							new NonMatch("*/"),
45  
							new Repetition(new NonMatch("*"), 0, INFINITY)
46  
						), 0, 1)
47  
					), 0, INFINITY
48  
				),
49  
				new Singleton("*/")
50  
			))), new Union(
51  
52  
			/**
53  
			* End Of Line Comment: //[^\n]*\n
54  
			*/
55  
			new Concatenation(
56  
				new Singleton("//"), new Concatenation(
57  
				new Repetition(new NonMatch("\n"), 0, INFINITY),
58  
				new Singleton("\n")
59  
			)),
60  
61  
			//
62  
			// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
63  
			//
64  
			new Concatenation(
65  
				new Singleton("/**"), new Concatenation(
66  
				new Repetition(
67  
					new Concatenation(
68  
						new Repetition(new Concatenation(
69  
							new NonMatch("*/"),
70  
							new Repetition(new NonMatch("*"), 0, INFINITY)
71  
						), 0, 1),
72  
						new Singleton("*")
73  
					), 0, INFINITY
74  
				),
75  
				new Singleton("/")
76  
			))
77  
		)));
78  
79  
		put("IDENTIFIER", new Concatenation(
80  
			new Union(
81  
				PosixClass.alpha(),
82  
				new Match(idChars1)
83  
			),
84  
			new Repetition(
85  
				new Union(
86  
					PosixClass.alnum(),
87  
					new Match(idChars2)
88  
				), 0, INFINITY
89  
			)
90  
		));
91  
92  
		/**
93  
		* 19.3 Terminals from section 3.10.1: Integer Literal
94  
		*/
95  
		put("INTEGER_LITERAL", new Concatenation(
96  
			new Union(
97  
				/**
98  
				* Decimal Integer Literal: 0|[1-9][[:digit:]]*
99  
				*/
100  
				new Singleton("0"), new Union(
101  
102  
				new Concatenation(
103  
					new Range('1', '9'),
104  
					new Repetition(PosixClass.digit(), 0, INFINITY)
105  
				), new Union(
106  
107  
				/**
108  
				* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
109  
				*/
110  
				new Concatenation(
111  
					new Singleton("0"), new Concatenation(
112  
					new Match("xX"),
113  
					new Repetition(PosixClass.xdigit(), 1, INFINITY)
114  
				)),
115  
116  
				/**
117  
				* Octal Integer Literal: 0[0-7]+
118  
				*/
119  
				new Concatenation(
120  
					new Singleton("0"),
121  
					new Repetition(new Range('0', '7'), 1, INFINITY)
122  
				)
123  
			))),
124  
			new Repetition(new Match("lL"), 0, 1)
125  
		));
126  
127  
		/**
128  
		* 19.3 Terminals from Java section 3.10.5: String Literal - extended for multi-line strings
129  
		*/
130  
		put("STRING_LITERAL", new Concatenation(
131  
			new Singleton("\""), new Concatenation(
132  
			new Repetition(
133  
				new Union(
134  
135  
					/**
136  
					* Single Character: [^\r\n"\\]
137  
					*/
138  
					new NonMatch("\"\\"), // Java: new NonMatch("\r\n\"\\"),
139  
140  
					/**
141  
					* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
142  
					*/
143  
					new Concatenation(
144  
						new Singleton("\\"),
145  
						new Union(
146  
							new Match("btnfr\"'\\"),
147  
							new Concatenation(
148  
								new Repetition(new Range('0', '3'), 0, 1),
149  
								new Repetition(new Range('0', '7'), 1, 2)
150  
							)
151  
						)
152  
					)
153  
				), 0, INFINITY
154  
			),
155  
			new Singleton("\"")
156  
		)));
157  
158  
    /**
159  
     * Addition for trees: Single-quote string literal
160  
     * (is stored quoted with double-quotes. Yeah, it's
161  
     * a little confusing, but don't worry.)
162  
     */
163  
    put("SQ_STRING_LITERAL", new Concatenation(
164  
      new Singleton("\'"), new Concatenation(
165  
      new Repetition(
166  
        new Union(
167  
168  
          /**
169  
           * Single Character: [^\r\n"\\]
170  
           */
171  
          new NonMatch("\r\n\'\\"),
172  
173  
          /**
174  
           * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
175  
           */
176  
          new Concatenation(
177  
            new Singleton("\\"),
178  
            new Union(
179  
              new Match("btnfr\"'\\"),
180  
              new Concatenation(
181  
                new Repetition(new Range('0', '3'), 0, 1),
182  
                new Repetition(new Range('0', '7'), 1, 2)
183  
              )
184  
            )
185  
          )
186  
        ), 0, INFINITY
187  
      ),
188  
      new Singleton("\'")
189  
    )));
190  
191  
    /**
192  
     * Multi-line string literals within brackets (>> hello world <<)
193  
     */
194  
    put("ML_STRING_LITERAL", new Concatenation(
195  
      new Singleton(">>"), new Concatenation(
196  
      new Repetition(
197  
        new Union(
198  
          // either not a closing bracket..
199  
          new NonMatch("<"),
200  
201  
          // or one closing bracket but not two of them
202  
          new Concatenation(
203  
            new Singleton("<"),
204  
            new NonMatch("<"))
205  
        ), 0, INFINITY),
206  
      new Singleton("<<")
207  
    )));
208  
209  
    /**
210  
    * 19.3 Terminals from section 3.10.2: Floating-Point Literal
211  
    */
212  
    put("FLOATING_POINT_LITERAL", new Union(
213  
214  
      /**
215  
      * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
216  
      */
217  
      new Concatenation(
218  
        new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
219  
        new Singleton("."), new Concatenation(
220  
        new Repetition(PosixClass.digit(), allowShortFloatingPoints ? 0 : 1, INFINITY), new Concatenation(
221  
        new Repetition(new Concatenation(
222  
          new Match("eE"), new Concatenation(
223  
          new Repetition(new Match("-+"), 0, 1),
224  
          new Repetition(PosixClass.digit(), 1, INFINITY)
225  
        )), 0, 1),
226  
        new Repetition(new Match("fFdD"), 0, 1)
227  
      )))), new Union(
228  
229  
      /**
230  
      * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
231  
      */
232  
      new Concatenation(
233  
        new Singleton("."), new Concatenation(
234  
        new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
235  
        new Repetition(new Concatenation(
236  
          new Match("eE"), new Concatenation(
237  
          new Repetition(new Match("-+"), 0, 1),
238  
          new Repetition(PosixClass.digit(), 1, INFINITY)
239  
        )), 0, 1),
240  
        new Repetition(new Match("fFdD"), 0, 1)
241  
      ))), new Union(
242  
243  
      /**
244  
      * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
245  
      */
246  
      new Concatenation(
247  
        new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
248  
        new Match("eE"), new Concatenation(
249  
        new Repetition(new Match("-+"), 0, 1), new Concatenation(
250  
        new Repetition(PosixClass.digit(), 1, INFINITY),
251  
        new Repetition(new Match("fFdD"), 0, 1)
252  
      )))),
253  
254  
      /**
255  
      * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
256  
      */
257  
      new Concatenation(
258  
        new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
259  
        new Repetition(new Concatenation(
260  
          new Match("eE"), new Concatenation(
261  
          new Repetition(new Match("-+"), 0, 1),
262  
          new Repetition(PosixClass.digit(), 1, INFINITY)
263  
        )), 0, 1),
264  
        new Match("fFdD")
265  
      ))
266  
    ))));
267  
   }
268  
  } // class MyLexicon
269  
}

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #646
Snippet name: Lexicon test (tokenizing)
Eternal ID of this version: #646/1
Text MD5: 8a857a79cd64aea9ea7deeb3d0ca7a94
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-26 21:22:08
Source code size: 7446 bytes / 269 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 691 / 607
Referenced in: [show references]