Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

152
LINES

< > BotCompany Repo | #1028573 - EnglishDateParser, attempt 4 [using ParsedWithTokens, LIVE]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (4888L/30K).

sclass EnglishDateParser > DateStructures {
  replace P with ParsedWithTokens.
  
  bool assumeFuture = true; // e.g. for "tuesday" [not used]
  LS tok;
  int maxTokens = 3;

  // "top dogs" are the longest non-overlapping parses
  L<P<SomeDate>> topDogs(S s) {
    ret pwt_topDogs(pwt_filterByType SomeDate(allParses(s)));
  }
  
  SomeDate parse(S s) {
    ret getVar(first(topDogs(s)));
  }
  
  ItIt<P> allParses(S s) {
    // tokenize, initialize
    L<P<S>> initials = pwt_initial(tok = javaTok(s), maxTokens);
    new L<P> out;
    
    // find numbers
    L<P<Int>> numbers = pwt_transform(number(), initials);

    // find ordinals (1st, 2nd, ...)
    new L<P<Int>> ordinals;
    for (P<Int> number : numbers)
      for (P<S> ord : parseToTheRight(fixedToken("st", "nd", "rd", "th"), number))
        ordinals.add(pwt_combine(number, ord));
    //print(+ordinals);
    
    // "in <n> days"
    for (P<Int> number : numbers)
      for (P<S> in : parseToTheLeft(fixedToken("in"), number))
        for (P<S> days : parseToTheRight(fixedToken("day", "days"), number))
          out.add(pwt_combine(new TodayPlus(number!), in, days));
    
    // "<n> days from now"
    for (P<Int> number : numbers)
      for (P<S> daysFromNow : parseToTheRight(fixedToken("day from now", "days from now"), number))
        out.add(pwt_combine(new TodayPlus(number!), number, daysFromNow));
    
    L<P<Int>> years = pwt_filter isYear(numbers);
    out.addAll(years);

    L<P<Int>> months = pwt_filter isMonthNr(numbers);
    L<P<Int>> dayOfMonths = pwt_filter isDayOfMonth(numbers);
    
    L<P<Weekday>> weekdays = pwt_transform(weekday(), initials);
    out.addAll(weekdays);

    // month names
    L<P<Month>> monthNames = pwt_transform(monthName(), initials);
    out.addAll(monthNames);

    // month name + year, e.g. "February 2020"
    out.addAll(pwt_combine(monthNames, years, (month, year) -> new Month(month.month, new Year(year))));

    // month name + ordinal, e.g. "March 4th"
    out.addAll(pwt_combine(monthNames, ordinals, (month, ord) -> new Day(ord, month)));

    // yesterday, today, tomorrow
    out.addAll(pwt_transform(t ->
      eqic(t, "yesterday") ? new TodayPlus(-1) :
      eqic(t, "today") ? new TodayPlus(0) :
      new TodayPlus(1),
      pwt_transform(fixedToken("yesterday", "today", "tomorrow"), initials)));
      
    // last/this/next week
    for (P<S> week : pwt_transform(fixedToken("week"), initials))
      for (P<S> which : parseToTheLeft(fixedToken("last", "this", "next"), week))
        out.add(pwt_combine(new CurrentWeekPlus(
          eqic(which!, "last") ? -1
          : eqic(which!, "this") ? 0 : 1), which, week));
    
    // "next <weekday>"
    for (P<Weekday> weekday : weekdays)
      for (P<S> next : parseToTheLeft(fixedToken("next"), weekday))
        out.add(pwt_combine(new Weekday(weekday->weekday, new CurrentWeekPlus(1)), next, weekday));

    for (P<Int> year : years)
      for (P<S> slash : parseToTheRight(fixedToken("/"), year))
        for (P<Int> month : pwt_toTheRightOf(months, slash))
          for (P<S> slash2 : parseToTheRight(fixedToken("/"), month))
            for (P<Int> day : pwt_toTheRightOf(dayOfMonths, slash2))
              out.add(pwt_combine(new Day(day!, new Month(month!, new Year(year!))), year, day));

    L<P<Hour>> hours = pwt_transform numberToHour(numbers);
    L<P<Int>> minutes = pwt_filter isMinute(numbers);
    L<P<Int>> seconds = minutes;

    L<P<S>> colons = pwt_filter(t -> eq(t, ":"), initials);

    // 15:12 etc.
    L<P<Minute>> hoursAndMinutes = pwt_combine(hours, colons, minutes, (h, _, m) -> new Minute(m, h));
    out.addAll(hoursAndMinutes);

    L<P<Second>> hoursAndMinutesAndSeconds = pwt_combine(hoursAndMinutes, colons, seconds,
      (hm, _, second) -> new Second(second, hm));
    out.addAll(hoursAndMinutesAndSeconds);

    L<P<S>> amPMs = pwt_transform(fixedToken("am", "pm"), initials);

    // 3 am, 5 pm etc.
    L<P<Hour>> amPMTimes = pwt_combine(numbers, amPMs, (hour, amPM) -> !between(hour, 1, 12) ? null : new Hour(hour, eqic(amPM, "pm")));
    out.addAll(amPMTimes);

    // between 1 and 2 pm
    for (P<Hour> time : amPMTimes)
      for (P<S> and : parseToTheLeft(fixedToken("and"), time))
        for (P<Hour> hour : pwt_toTheLeftOf(hours, and))
          out.add(pwt_combine(new Between(new Hour(hour->hour, time->isPM), time!), time, hour));
        
    ret itIt(out);
  }

  IF1<S, Int> number() { ret s -> isInteger(s) ? parseInt(s) : null; }
  
  bool isYear(int n) { ret between(n, 1900, 2100); }
  bool isMonthNr(int n) { ret between(n, 1, 12); }
  bool isDayOfMonth(int n) { ret between(n, 1, 31); }
  bool isHour(int n) { ret between(n, 0, 23); }
  bool isMinute(int n) { ret between(n, 0, 59); }
  bool isSecond(int n) { ret between(n, 0, 59); }

  Hour numberToHour(int n) { ret !isHour(n) ? null : n > 12 ? new Hour(n-12, true) : new Hour(n, null); }

  IF1<S> fixedToken(S... tokens) { ret fixedToken(litciset(tokens)); }
  IF1<S> fixedToken(Set<S> set) { ret t -> contains(set, t) ? t : null; }
  
  IF1<S, Weekday> weekday() {
    ret s -> {
      int n = parseEnglishWeekday(s);
      ret n == 0 ? null : new Weekday(n);
    };
  }
  
  IF1<S, Month> monthName() {
    ret s -> {
      int n = parseEnglishMonthName(s);
      ret n == 0 ? null : new Month(n);
    };
  }
  
  <A, B> L<ParsedWithTokens<B>> parseToTheLeft(IF1<A, B> f, ParsedWithTokens p) {
    ret pwt_transform(f, pwt_precedingTokens(1, maxTokens, p.start()));
  }
  
  <A, B> L<ParsedWithTokens<B>> parseToTheRight(IF1<A, B> f, ParsedWithTokens p) {
    ret pwt_transform(f, pwt_followingTokens(1, maxTokens, p.remaining()));
  }
}

Author comment

Began life as a copy of #1028540

download  show line numbers  debug dex   

Travelled to 6 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028573
Snippet name: EnglishDateParser, attempt 4 [using ParsedWithTokens, LIVE]
Eternal ID of this version: #1028573/143
Text MD5: 67a6a957f657034566fd8d147654050a
Transpilation MD5: ea25a1825ad8844973efa5d4cb6038e2
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-10 18:46:26
Source code size: 5805 bytes / 152 lines
Pitched / IR pitched: No / No
Views / Downloads: 286 / 865
Version history: 142 change(s)
Referenced in: [show references]

Formerly at http://tinybrain.de/1028573 & http://1028573.tinybrain.de