Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

292
LINES

< > BotCompany Repo | #648 // Lexicon test 2 (tokenizing Java)

JavaX source code - run with: x30.jar

!636

!629 // standard functions
!1000300 // class Lexicon

main {
  psvm {
    String src = takeInput(args, "class main {\n  String s;\n}");
    Java20 lex = new Java20();
    src = src.replace("\r\n", "\n");
    LineNumberReader source = new LineNumberReader(new StringReader(src));
    int lineNr = source.getLineNumber()+1;
    for (Object a; (a = lex.grab(source)) != lex.$;) {
      System.out.println("grabbed at line " + lineNr + ": " + a + " " + quote(lex.word()));
      lineNr = source.getLineNumber()+1;
    }
  }
  
  static String takeInput(String[] args, String def) tex {
    if (args.length != 0) return loadSnippet(args[0]);
    return loadTextFile("input/input.txt", def);
  }
  
  public static String quote(String s) {
    if (s == null) return "null";
    return "\"" + s.replace("\\", "\\\\").replace("\"", "\\\"").replace("\r", "\\r").replace("\n", "\\n") + "\"";
  }
  
  static class Java20 extends Lexicon {

	Java20() {

		/**
		* Grammar for Java 2.0.
		*
		* Nonterminal - first letter uppercase
		* TERMINAL - all letters uppercase
		* keyword - all letters lowercase
		*/
		int INFINITY = -1;

		/**
		* 19.3 Terminals from section 3.6: White Space: [[:space:]]
		*/
		put("WHITE_SPACE", new Repetition(PosixClass.space(), 1, INFINITY));

		/**
		* 19.3 Terminals from section 3.7: Comment
		*/
		put("COMMENT", new Union(

			//
			// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
			//
			new Concatenation(
				new Singleton("/*"), new Concatenation(
				new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
				new Repetition(
					new Concatenation(
						new Singleton("*"),
						new Repetition(new Concatenation(
							new NonMatch("*/"),
							new Repetition(new NonMatch("*"), 0, INFINITY)
						), 0, 1)
					), 0, INFINITY
				),
				new Singleton("*/")
			))), new Union(

			/**
			* End Of Line Comment: //[^\n]*\n
			*/
			new Concatenation(
				new Singleton("//"), new Concatenation(
				new Repetition(new NonMatch("\n"), 0, INFINITY),
				new Singleton("\n")
			)),

			//
			// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
			//
			new Concatenation(
				new Singleton("/**"), new Concatenation(
				new Repetition(
					new Concatenation(
						new Repetition(new Concatenation(
							new NonMatch("*/"),
							new Repetition(new NonMatch("*"), 0, INFINITY)
						), 0, 1),
						new Singleton("*")
					), 0, INFINITY
				),
				new Singleton("/")
			))
		)));

		put("IDENTIFIER", new Concatenation(
			new Union(
				PosixClass.alpha(),
				new Match("_$")
			),
			new Repetition(
				new Union(
					PosixClass.alnum(),
					new Match("_$")
				), 0, INFINITY
			)
		));

		/**
		* 19.3 Terminals from section 3.9: Keyword (recognized but not in the Java grammar)
		*/
		put("KEYWORD", new Union(
			new Singleton("const"),
			new Singleton("goto")
		));

		/**
		* 19.3 Terminals from section 3.10.1: Integer Literal
		*/
		put("INTEGER_LITERAL", new Concatenation(
			new Union(
				/**
				* Decimal Integer Literal: 0|[1-9][[:digit:]]*
				*/
				new Singleton("0"), new Union(

				new Concatenation(
					new Range('1', '9'),
					new Repetition(PosixClass.digit(), 0, INFINITY)
				), new Union(

				/**
				* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
				*/
				new Concatenation(
					new Singleton("0"), new Concatenation(
					new Match("xX"),
					new Repetition(PosixClass.xdigit(), 1, INFINITY)
				)),

				/**
				* Octal Integer Literal: 0[0-7]+
				*/
				new Concatenation(
					new Singleton("0"),
					new Repetition(new Range('0', '7'), 1, INFINITY)
				)
			))),
			new Repetition(new Match("lL"), 0, 1)
		));

		/**
		* 19.3 Terminals from section 3.10.2: Floating-Point Literal
		*/
		put("FLOATING_POINT_LITERAL", new Union(

			/**
			* [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
			*/
			new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
				new Singleton("."), new Concatenation(
				new Repetition(PosixClass.digit(), 0, INFINITY), new Concatenation(
				new Repetition(new Concatenation(
					new Match("eE"), new Concatenation(
					new Repetition(new Match("-+"), 0, 1),
					new Repetition(PosixClass.digit(), 1, INFINITY)
				)), 0, 1),
				new Repetition(new Match("fFdD"), 0, 1)
			)))), new Union(

			/**
			* \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
			*/
			new Concatenation(
				new Singleton("."), new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
				new Repetition(new Concatenation(
					new Match("eE"), new Concatenation(
					new Repetition(new Match("-+"), 0, 1),
					new Repetition(PosixClass.digit(), 1, INFINITY)
				)), 0, 1),
				new Repetition(new Match("fFdD"), 0, 1)
			))), new Union(

			/**
			* [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
			*/
			new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
				new Match("eE"), new Concatenation(
				new Repetition(new Match("-+"), 0, 1), new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY),
				new Repetition(new Match("fFdD"), 0, 1)
			)))),

			/**
			* [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
			*/
			new Concatenation(
				new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
				new Repetition(new Concatenation(
					new Match("eE"), new Concatenation(
					new Repetition(new Match("-+"), 0, 1),
					new Repetition(PosixClass.digit(), 1, INFINITY)
				)), 0, 1),
				new Match("fFdD")
			))
		))));

		/**
		* 19.3 Terminals from section 3.10.3: Boolean Literal
		*/
		put("BOOLEAN_LITERAL", new Union(
			new Singleton("true"),
			new Singleton("false")
		));

		/**
		* 19.3 Terminals from section 3.10.4: Character Literal
		*/
		put("CHARACTER_LITERAL", new Concatenation(
			new Singleton("'"), new Concatenation(
			new Union(

				/**
				* Single Character: [^\r\n'\\]
				*/
				new NonMatch("\r\n'\\"),

				/**
				* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
				*/
				new Concatenation(
					new Singleton("\\"),
					new Union(
						new Match("btnfr\"'\\"),
						new Concatenation(
							new Repetition(new Range('0', '3'), 0, 1),
							new Repetition(new Range('0', '7'), 1, 2)
						)
					)
				)
			),
			new Singleton("'")
		)));

		/**
		* 19.3 Terminals from section 3.10.5: String Literal
		*/
		put("STRING_LITERAL", new Concatenation(
			new Singleton("\""), new Concatenation(
			new Repetition(
				new Union(

					/**
					* Single Character: [^\r\n"\\]
					*/
					new NonMatch("\r\n\"\\"),

					/**
					* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
					*/
					new Concatenation(
						new Singleton("\\"),
						new Union(
							new Match("btnfr\"'\\"),
							new Concatenation(
								new Repetition(new Range('0', '3'), 0, 1),
								new Repetition(new Range('0', '7'), 1, 2)
							)
						)
					)
				), 0, INFINITY
			),
			new Singleton("\"")
		)));

		/**
		* 19.3 Terminals section 3.10.7: Null Literal
		*/
		put("NULL_LITERAL", new Singleton("null"));
		
		// OK, it seems we have to add some more stuff...
		
		//put("OTHER1", new Match(";{}=,<>[]().+-:|&!"));
		//put("OTHER1", new NonMatch("")); // catch anything, one character at a time
		put("OTHER1", new NonMatch(" \t\r\n")); // catch any non-whitespace, one character at a time

	}
} // class Java20
}

Author comment

Began life as a copy of #646

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #648
Snippet name: Lexicon test 2 (tokenizing Java)
Eternal ID of this version: #648/1
Text MD5: fe078705832195394c64b47b92834d91
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-27 17:07:16
Source code size: 7606 bytes / 292 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 637 / 539
Referenced in: [show references]