Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

265
LINES

< > BotCompany Repo | #2000394 // class Java20 (Java tokenizer)

New Tinybrain snippet

  
  class Java20 extends Lexicon {

	Java20() {

		/**
		* Grammar for Java 2.0.
		*
		* Nonterminal - first letter uppercase
		* TERMINAL - all letters uppercase
		* keyword - all letters lowercase
		*/
		int INFINITY = -1;

		/**
		* 19.3 Terminals from section 3.6: White Space: [[:space:]]
		*/
		put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));

		/**
		* 19.3 Terminals from section 3.7: Comment
		*/
		put("COMMENT", new Union(

			//
			// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
			//
			new Concatenation(
				new Singleton("/*"), new Concatenation(
				new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
				new Repetition(
					new Concatenation(
						new Singleton("*"),
						new Repetition(new Concatenation(
							new NonMatch("*/"),
							new Repetition(new NonMatch("*"), 0, INFINITY)
						), 0, 1)
					), 0, INFINITY
				),
				new Singleton("*/")
			))), new Union(

			/**
			* End Of Line Comment: //[^\n]*\n
			*/
			new Concatenation(
				new Singleton("//"), new Concatenation(
				new Repetition(new NonMatch("\n"), 0, INFINITY),
				new Singleton("\n")
			)),

			//
			// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
			//
			new Concatenation(
				new Singleton("/**"), new Concatenation(
				new Repetition(
					new Concatenation(
						new Repetition(new Concatenation(
							new NonMatch("*/"),
							new Repetition(new NonMatch("*"), 0, INFINITY)
						), 0, 1),
						new Singleton("*")
					), 0, INFINITY
				),
				new Singleton("/")
			))
		)));

		put("IDENTIFIER", new Concatenation(
			new Union(
				PosixClass.alpha(),
				new Match("_$")
			),
			new Repetition(
				new Union(
					PosixClass.alnum(),
					new Match("_$")
				), 0, INFINITY
			)
		));

		/**
		* 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar)
		*/
		put("KEYWORD", new Union(
			new Singleton("const"),
			new Singleton("goto")
		));

		/**
		* 19.3 Terminals from section 3.10.1: Integer Literal
		*/
		put("INTEGER_LITERAL", new Concatenation(
			new Union(
				/**
				* Decimal Integer Literal: 0|[1-9][[:digit:]]*
				*/
				new Singleton("0"), new Union(

				new Concatenation(
					new Range('1', '9'),
					new Repetition(PosixClass.digit(), 0, INFINITY)
				), new Union(

				/**
				* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
				*/
				new Concatenation(
					new Singleton("0"), new Concatenation(
					new Match("xX"),
					new Repetition(PosixClass.xdigit(), 1, INFINITY)
				)),

				/**
				* Octal Integer Literal: 0[0-7]+
				*/
				new Concatenation(
					new Singleton("0"),
					new Repetition(new Range('0', '7'), 1, INFINITY)
				)
			))),
			new Repetition(new Match("lL"), 0, 1)
		));

		/**
		* 19.3 Terminals from section 3.10.2: Floating-Point Literal
		*/
		put("FLOATING_POINT_LITERAL", new Union(

			/**
			* [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
			*/
			new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
				new Singleton("."), new Concatenation(
				new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation(
				new Repetition(new Concatenation(
					new Match("eE"), new Concatenation(
					new Repetition(new Match("-+"), 0, 1),
					new Repetition(PosixClass.digit(), 1, INFINITY)
				)), 0, 1),
				new Repetition(new Match("fFdD"), 0, 1)
			)))), new Union(

			/**
			* \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
			*/
			new Concatenation(
				new Singleton("."), new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
				new Repetition(new Concatenation(
					new Match("eE"), new Concatenation(
					new Repetition(new Match("-+"), 0, 1),
					new Repetition(PosixClass.digit(), 1, INFINITY)
				)), 0, 1),
				new Repetition(new Match("fFdD"), 0, 1)
			))), new Union(

			/**
			* [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
			*/
			new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
				new Match("eE"), new Concatenation(
				new Repetition(new Match("-+"), 0, 1), new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY),
				new Repetition(new Match("fFdD"), 0, 1)
			)))),

			/**
			* [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
			*/
			new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
				new Repetition(new Concatenation(
					new Match("eE"), new Concatenation(
					new Repetition(new Match("-+"), 0, 1),
					new Repetition(PosixClass.digit(), 1, INFINITY)
				)), 0, 1),
				new Match("fFdD")
			))
		))));

		/**
		* 19.3 Terminals from section 3.10.3: Boolean Literal
		*/
		put("BOOLEAN_LITERAL", new Union(
			new Singleton("true"),
			new Singleton("false")
		));

		/**
		* 19.3 Terminals from section 3.10.4: Character Literal
		*/
		put("CHARACTER_LITERAL", new Concatenation(
			new Singleton("'"), new Concatenation(
			new Union(

				/**
				* Single Character: [^\r\n'\\]
				*/
				new NonMatch("\r\n'\\"),

				/**
				* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
				*/
				new Concatenation(
					new Singleton("\\"),
					new Union(
						new Match("btnfr\"'\\"),
						new Concatenation(
							new Repetition(new Range('0', '3'), 0, 1),
							new Repetition(new Range('0', '7'), 1, 2)
						)
					)
				)
			),
			new Singleton("'")
		)));

		/**
		* 19.3 Terminals from section 3.10.5: String Literal
		*/
		put("STRING_LITERAL", new Concatenation(
			new Singleton("\""), new Concatenation(
			new Repetition(
				new Union(

					/**
					* Single Character: [^\r\n"\\]
					*/
					new NonMatch("\r\n\"\\"),

					/**
					* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
					*/
					new Concatenation(
						new Singleton("\\"),
						new Union(
							new Match("btnfr\"'\\"),
							new Concatenation(
								new Repetition(new Range('0', '3'), 0, 1),
								new Repetition(new Range('0', '7'), 1, 2)
							)
						)
					)
				), 0, INFINITY
			),
			new Singleton("\"")
		)));

		/**
		* 19.3 Terminals section 3.10.7: Null Literal
		*/
		put("NULL_LITERAL", new Singleton("null"));
		
		// OK, it seems we have to add some more stuff...
		
		//put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
		//put("OTHER1", new NonMatch("")); // catch anything, one character at a time
		put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time

	}
} // class Java20

Author comment

Began life as a copy of #648

download  show line numbers   

Snippet is not live.

Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

Comments [hide]

ID Author/Program Comment Date
301 #1000604 (pitcher) 2015-08-18 00:54:56
257 #1000610 (pitcher) Edit suggestion:
!636
!629

main {
static Object androidContext;
static String programID;

public static void main(String[] args) throws Exception {

class Java20 extends Lexicon {

Java20() {

/**
* Grammar for Java 2.0.
*
* Nonterminal - first letter uppercase
* TERMINAL - all letters uppercase
* keyword - all letters lowercase
*/
int INFINITY = -1;

/**
* 19.3 Terminals from section 3.6: White Space: [[:space:]]
*/
put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));

/**
* 19.3 Terminals from section 3.7: Comment
*/
put("COMMENT", new Union(

//
// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
//
new Concatenation(
new Singleton("/*"), new Concatenation(
new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
new Repetition(
new Concatenation(
new Singleton("*"),
new Repetition(new Concatenation(
new NonMatch("*/"),
new Repetition(new NonMatch("*"), 0, INFINITY)
), 0, 1)
), 0, INFINITY
),
new Singleton("*/")
))), new Union(

/**
* End Of Line Comment: //[^\n]*\n
*/
new Concatenation(
new Singleton("//"), new Concatenation(
new Repetition(new NonMatch("\n"), 0, INFINITY),
new Singleton("\n")
)),

//
// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
//
new Concatenation(
new Singleton("/**"), new Concatenation(
new Repetition(
new Concatenation(
new Repetition(new Concatenation(
new NonMatch("*/"),
new Repetition(new NonMatch("*"), 0, INFINITY)
), 0, 1),
new Singleton("*")
), 0, INFINITY
),
new Singleton("/")
))
)));

put("IDENTIFIER", new Concatenation(
new Union(
PosixClass.alpha(),
new Match("_$")
),
new Repetition(
new Union(
PosixClass.alnum(),
new Match("_$")
), 0, INFINITY
)
));

/**
* 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar)
*/
put("KEYWORD", new Union(
new Singleton("const"),
new Singleton("goto")
));

/**
* 19.3 Terminals from section 3.10.1: Integer Literal
*/
put("INTEGER_LITERAL", new Concatenation(
new Union(
/**
* Decimal Integer Literal: 0|[1-9][[:digit:]]*
*/
new Singleton("0"), new Union(

new Concatenation(
new Range('1', '9'),
new Repetition(PosixClass.digit(), 0, INFINITY)
), new Union(

/**
* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
*/
new Concatenation(
new Singleton("0"), new Concatenation(
new Match("xX"),
new Repetition(PosixClass.xdigit(), 1, INFINITY)
)),

/**
* Octal Integer Literal: 0[0-7]+
*/
new Concatenation(
new Singleton("0"),
new Repetition(new Range('0', '7'), 1, INFINITY)
)
))),
new Repetition(new Match("lL"), 0, 1)
));

/**
* 19.3 Terminals from section 3.10.2: Floating-Point Literal
*/
put("FLOATING_POINT_LITERAL", new Union(

/**
* [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
*/
new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
new Singleton("."), new Concatenation(
new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation(
new Repetition(new Concatenation(
new Match("eE"), new Concatenation(
new Repetition(new Match("-+"), 0, 1),
new Repetition(PosixClass.digit(), 1, INFINITY)
)), 0, 1),
new Repetition(new Match("fFdD"), 0, 1)
)))), new Union(

/**
* \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
*/
new Concatenation(
new Singleton("."), new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
new Repetition(new Concatenation(
new Match("eE"), new Concatenation(
new Repetition(new Match("-+"), 0, 1),
new Repetition(PosixClass.digit(), 1, INFINITY)
)), 0, 1),
new Repetition(new Match("fFdD"), 0, 1)
))), new Union(

/**
* [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
*/
new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
new Match("eE"), new Concatenation(
new Repetition(new Match("-+"), 0, 1), new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY),
new Repetition(new Match("fFdD"), 0, 1)
)))),

/**
* [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
*/
new Concatenation(
new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
new Repetition(new Concatenation(
new Match("eE"), new Concatenation(
new Repetition(new Match("-+"), 0, 1),
new Repetition(PosixClass.digit(), 1, INFINITY)
)), 0, 1),
new Match("fFdD")
))
))));

/**
* 19.3 Terminals from section 3.10.3: Boolean Literal
*/
put("BOOLEAN_LITERAL", new Union(
new Singleton("true"),
new Singleton("false")
));

/**
* 19.3 Terminals from section 3.10.4: Character Literal
*/
put("CHARACTER_LITERAL", new Concatenation(
new Singleton("'"), new Concatenation(
new Union(

/**
* Single Character: [^\r\n'\\]
*/
new NonMatch("\r\n'\\"),

/**
* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
*/
new Concatenation(
new Singleton("\\"),
new Union(
new Match("btnfr\"'\\"),
new Concatenation(
new Repetition(new Range('0', '3'), 0, 1),
new Repetition(new Range('0', '7'), 1, 2)
)
)
)
),
new Singleton("'")
)));

/**
* 19.3 Terminals from section 3.10.5: String Literal
*/
put("STRING_LITERAL", new Concatenation(
new Singleton("\""), new Concatenation(
new Repetition(
new Union(

/**
* Single Character: [^\r\n"\\]
*/
new NonMatch("\r\n\"\\"),

/**
* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
*/
new Concatenation(
new Singleton("\\"),
new Union(
new Match("btnfr\"'\\"),
new Concatenation(
new Repetition(new Range('0', '3'), 0, 1),
new Repetition(new Range('0', '7'), 1, 2)
)
)
)
), 0, INFINITY
),
new Singleton("\"")
)));

/**
* 19.3 Terminals section 3.10.7: Null Literal
*/
put("NULL_LITERAL", new Singleton("null"));

// OK, it seems we have to add some more stuff...

//put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
//put("OTHER1", new NonMatch("")); // catch anything, one character at a time
put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time

}
} // class Java20

}}
2015-08-18 00:53:13

add comment

Snippet ID: #2000394
Snippet name: class Java20 (Java tokenizer)
Eternal ID of this version: #2000394/1
Text MD5: 0f87ae118d54e7797fcee74aef40c57b
Author: stefan
Category: javax
Type: New Tinybrain snippet
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-27 08:59:23
Source code size: 6681 bytes / 265 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 758 / 465
Referenced in: [show references]