Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

269
LINES

< > BotCompany Repo | #646 // Lexicon test (tokenizing)

JavaX source code - run with: x30.jar

!636

!1000300 // class Lexicon

main {
  psvm {
    String src = "class main { String s; }";
    MyLexicon lex = new MyLexicon();
    lex.lexicalRules(true);
    LineNumberReader source = new LineNumberReader(new StringReader(src));
    for (Object a; (a = lex.grab(source)) != lex.$;) {
      System.out.println("grabbed: " + a);
    }
  }
  
  static class MyLexicon extends Lexicon {
   private static final String idChars1 = "_$?-#+";
   private static final String idChars2 = "_$-.+";
   
   void lexicalRules(boolean allowShortFloatingPoints) {
    // These rules are taken from Java10.java (part of gi-0.9 distribution)

		int INFINITY = -1;

		/**
		* 19.3 Terminals from section 3.6: White Space: [[:space:]]
		*/
		put("WHITE_SPACE", PosixClass.space());

		/**
		* 19.3 Terminals from section 3.7: Comment
		*/
		put("COMMENT", new Union(

			//
			// Traditional Comment: /\*[^*]+(\*([^*/][^*]*)?)*\*/
			//
			new Concatenation(
				new Singleton("/*"), new Concatenation(
				new Repetition(new NonMatch("*"), 1, INFINITY), new Concatenation(
				new Repetition(
					new Concatenation(
						new Singleton("*"),
						new Repetition(new Concatenation(
							new NonMatch("*/"),
							new Repetition(new NonMatch("*"), 0, INFINITY)
						), 0, 1)
					), 0, INFINITY
				),
				new Singleton("*/")
			))), new Union(

			/**
			* End Of Line Comment: //[^\n]*\n
			*/
			new Concatenation(
				new Singleton("//"), new Concatenation(
				new Repetition(new NonMatch("\n"), 0, INFINITY),
				new Singleton("\n")
			)),

			//
			// Documentation Comment: /\*\*(([^*/][^*]*)?\*)*/
			//
			new Concatenation(
				new Singleton("/**"), new Concatenation(
				new Repetition(
					new Concatenation(
						new Repetition(new Concatenation(
							new NonMatch("*/"),
							new Repetition(new NonMatch("*"), 0, INFINITY)
						), 0, 1),
						new Singleton("*")
					), 0, INFINITY
				),
				new Singleton("/")
			))
		)));

		put("IDENTIFIER", new Concatenation(
			new Union(
				PosixClass.alpha(),
				new Match(idChars1)
			),
			new Repetition(
				new Union(
					PosixClass.alnum(),
					new Match(idChars2)
				), 0, INFINITY
			)
		));

		/**
		* 19.3 Terminals from section 3.10.1: Integer Literal
		*/
		put("INTEGER_LITERAL", new Concatenation(
			new Union(
				/**
				* Decimal Integer Literal: 0|[1-9][[:digit:]]*
				*/
				new Singleton("0"), new Union(

				new Concatenation(
					new Range('1', '9'),
					new Repetition(PosixClass.digit(), 0, INFINITY)
				), new Union(

				/**
				* Hexadecimal Integer Literal: 0[xX][[:xdigit:]]+
				*/
				new Concatenation(
					new Singleton("0"), new Concatenation(
					new Match("xX"),
					new Repetition(PosixClass.xdigit(), 1, INFINITY)
				)),

				/**
				* Octal Integer Literal: 0[0-7]+
				*/
				new Concatenation(
					new Singleton("0"),
					new Repetition(new Range('0', '7'), 1, INFINITY)
				)
			))),
			new Repetition(new Match("lL"), 0, 1)
		));

		/**
		* 19.3 Terminals from Java section 3.10.5: String Literal - extended for multi-line strings
		*/
		put("STRING_LITERAL", new Concatenation(
			new Singleton("\""), new Concatenation(
			new Repetition(
				new Union(

					/**
					* Single Character: [^\r\n"\\]
					*/
					new NonMatch("\"\\"), // Java: new NonMatch("\r\n\"\\"),

					/**
					* Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
					*/
					new Concatenation(
						new Singleton("\\"),
						new Union(
							new Match("btnfr\"'\\"),
							new Concatenation(
								new Repetition(new Range('0', '3'), 0, 1),
								new Repetition(new Range('0', '7'), 1, 2)
							)
						)
					)
				), 0, INFINITY
			),
			new Singleton("\"")
		)));

    /**
     * Addition for trees: Single-quote string literal
     * (is stored quoted with double-quotes. Yeah, it's
     * a little confusing, but don't worry.)
     */
    put("SQ_STRING_LITERAL", new Concatenation(
      new Singleton("\'"), new Concatenation(
      new Repetition(
        new Union(

          /**
           * Single Character: [^\r\n"\\]
           */
          new NonMatch("\r\n\'\\"),

          /**
           * Escape Sequence: \\([btnfr\"'\\]|[0-3]?[0-7]{1,2})
           */
          new Concatenation(
            new Singleton("\\"),
            new Union(
              new Match("btnfr\"'\\"),
              new Concatenation(
                new Repetition(new Range('0', '3'), 0, 1),
                new Repetition(new Range('0', '7'), 1, 2)
              )
            )
          )
        ), 0, INFINITY
      ),
      new Singleton("\'")
    )));

    /**
     * Multi-line string literals within brackets (>> hello world <<)
     */
    put("ML_STRING_LITERAL", new Concatenation(
      new Singleton(">>"), new Concatenation(
      new Repetition(
        new Union(
          // either not a closing bracket..
          new NonMatch("<"),

          // or one closing bracket but not two of them
          new Concatenation(
            new Singleton("<"),
            new NonMatch("<"))
        ), 0, INFINITY),
      new Singleton("<<")
    )));

    /**
    * 19.3 Terminals from section 3.10.2: Floating-Point Literal
    */
    put("FLOATING_POINT_LITERAL", new Union(

      /**
      * [[:digit:]]+\.[[:digit:]]*([eE][-+]?[[:digit:]]+)?[fFdD]?
      */
      new Concatenation(
        new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
        new Singleton("."), new Concatenation(
        new Repetition(PosixClass.digit(), allowShortFloatingPoints ? 0 : 1, INFINITY), new Concatenation(
        new Repetition(new Concatenation(
          new Match("eE"), new Concatenation(
          new Repetition(new Match("-+"), 0, 1),
          new Repetition(PosixClass.digit(), 1, INFINITY)
        )), 0, 1),
        new Repetition(new Match("fFdD"), 0, 1)
      )))), new Union(

      /**
      * \.[[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]?
      */
      new Concatenation(
        new Singleton("."), new Concatenation(
        new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
        new Repetition(new Concatenation(
          new Match("eE"), new Concatenation(
          new Repetition(new Match("-+"), 0, 1),
          new Repetition(PosixClass.digit(), 1, INFINITY)
        )), 0, 1),
        new Repetition(new Match("fFdD"), 0, 1)
      ))), new Union(

      /**
      * [[:digit:]]+[eE][-+]?[[:digit:]]+[fFdD]?
      */
      new Concatenation(
        new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
        new Match("eE"), new Concatenation(
        new Repetition(new Match("-+"), 0, 1), new Concatenation(
        new Repetition(PosixClass.digit(), 1, INFINITY),
        new Repetition(new Match("fFdD"), 0, 1)
      )))),

      /**
      * [[:digit:]]+([eE][-+]?[[:digit:]]+)?[fFdD]
      */
      new Concatenation(
        new Repetition(PosixClass.digit(), 1, INFINITY), new Concatenation(
        new Repetition(new Concatenation(
          new Match("eE"), new Concatenation(
          new Repetition(new Match("-+"), 0, 1),
          new Repetition(PosixClass.digit(), 1, INFINITY)
        )), 0, 1),
        new Match("fFdD")
      ))
    ))));
   }
  } // class MyLexicon
}

download  show line numbers  debug dex  old transpilations   

Travelled to 15 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, qbtsjoyahagl, teubizvjbppd, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #646
Snippet name: Lexicon test (tokenizing)
Eternal ID of this version: #646/1
Text MD5: 8a857a79cd64aea9ea7deeb3d0ca7a94
Author: stefan
Category: javax
Type: JavaX source code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-06-26 21:22:08
Source code size: 7446 bytes / 269 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 688 / 603
Referenced in: [show references]