Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

152
LINES

< > BotCompany Repo | #1028573 // EnglishDateParser, attempt 4 [good example for ParsedWithTokens, LIVE]

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (8327L/47K).

1  
sclass EnglishDateParser > DateStructures {
2  
  replace P with ParsedWithTokens.
3  
  
4  
  bool assumeFuture = true; // e.g. for "tuesday" [not used]
5  
  LS tok;
6  
  int maxTokens = 3;
7  
8  
  // "top dogs" are the longest non-overlapping parses
9  
  L<P<SomeDate>> topDogs(S s) {
10  
    ret pwt_topDogs(pwt_filterByType SomeDate(allParses(s)));
11  
  }
12  
  
13  
  SomeDate parse(S s) {
14  
    ret getVar(first(topDogs(s)));
15  
  }
16  
  
17  
  ItIt<P> allParses(S s) {
18  
    // tokenize, initialize
19  
    L<P<S>> initials = pwt_initial(tok = javaTok(s), maxTokens);
20  
    new L<P> out;
21  
    
22  
    // find numbers
23  
    L<P<Int>> numbers = pwt_transform(number(), initials);
24  
25  
    // find ordinals (1st, 2nd, ...)
26  
    new L<P<Int>> ordinals;
27  
    for (P<Int> number : numbers)
28  
      for (P<S> ord : parseToTheRight(fixedToken("st", "nd", "rd", "th"), number))
29  
        ordinals.add(pwt_combine(number, ord));
30  
    //print(+ordinals);
31  
    
32  
    // "in <n> days"
33  
    for (P<Int> number : numbers)
34  
      for (P<S> in : parseToTheLeft(fixedToken("in"), number))
35  
        for (P<S> days : parseToTheRight(fixedToken("day", "days"), number))
36  
          out.add(pwt_combine(new TodayPlus(number!), in, days));
37  
    
38  
    // "<n> days from now"
39  
    for (P<Int> number : numbers)
40  
      for (P<S> daysFromNow : parseToTheRight(fixedToken("day from now", "days from now"), number))
41  
        out.add(pwt_combine(new TodayPlus(number!), number, daysFromNow));
42  
    
43  
    L<P<Int>> years = pwt_filter isYear(numbers);
44  
    out.addAll(years);
45  
46  
    L<P<Int>> months = pwt_filter isMonthNr(numbers);
47  
    L<P<Int>> dayOfMonths = pwt_filter isDayOfMonth(numbers);
48  
    
49  
    L<P<Weekday>> weekdays = pwt_transform(weekday(), initials);
50  
    out.addAll(weekdays);
51  
52  
    // month names
53  
    L<P<Month>> monthNames = pwt_transform(monthName(), initials);
54  
    out.addAll(monthNames);
55  
56  
    // month name + year, e.g. "February 2020"
57  
    out.addAll(pwt_combine(monthNames, years, (month, year) -> new Month(month.month, new Year(year))));
58  
59  
    // month name + ordinal, e.g. "March 4th"
60  
    out.addAll(pwt_combine(monthNames, ordinals, (month, ord) -> new Day(ord, month)));
61  
62  
    // yesterday, today, tomorrow
63  
    out.addAll(pwt_transform(t ->
64  
      eqic(t, "yesterday") ? new TodayPlus(-1) :
65  
      eqic(t, "today") ? new TodayPlus(0) :
66  
      new TodayPlus(1),
67  
      pwt_transform(fixedToken("yesterday", "today", "tomorrow"), initials)));
68  
      
69  
    // last/this/next week
70  
    for (P<S> week : pwt_transform(fixedToken("week"), initials))
71  
      for (P<S> which : parseToTheLeft(fixedToken("last", "this", "next"), week))
72  
        out.add(pwt_combine(new CurrentWeekPlus(
73  
          eqic(which!, "last") ? -1
74  
          : eqic(which!, "this") ? 0 : 1), which, week));
75  
    
76  
    // "next <weekday>"
77  
    for (P<Weekday> weekday : weekdays)
78  
      for (P<S> next : parseToTheLeft(fixedToken("next"), weekday))
79  
        out.add(pwt_combine(new Weekday(weekday->weekday, new CurrentWeekPlus(1)), next, weekday));
80  
81  
    for (P<Int> year : years)
82  
      for (P<S> slash : parseToTheRight(fixedToken("/"), year))
83  
        for (P<Int> month : pwt_toTheRightOf(months, slash))
84  
          for (P<S> slash2 : parseToTheRight(fixedToken("/"), month))
85  
            for (P<Int> day : pwt_toTheRightOf(dayOfMonths, slash2))
86  
              out.add(pwt_combine(new Day(day!, new Month(month!, new Year(year!))), year, day));
87  
88  
    L<P<Hour>> hours = pwt_transform numberToHour(numbers);
89  
    L<P<Int>> minutes = pwt_filter isMinute(numbers);
90  
    L<P<Int>> seconds = minutes;
91  
92  
    L<P<S>> colons = pwt_filter(t -> eq(t, ":"), initials);
93  
94  
    // 15:12 etc.
95  
    L<P<Minute>> hoursAndMinutes = pwt_combine(hours, colons, minutes, (h, _, m) -> new Minute(m, h));
96  
    out.addAll(hoursAndMinutes);
97  
98  
    L<P<Second>> hoursAndMinutesAndSeconds = pwt_combine(hoursAndMinutes, colons, seconds,
99  
      (hm, _, second) -> new Second(second, hm));
100  
    out.addAll(hoursAndMinutesAndSeconds);
101  
102  
    L<P<S>> amPMs = pwt_transform(fixedToken("am", "pm"), initials);
103  
104  
    // 3 am, 5 pm etc.
105  
    L<P<Hour>> amPMTimes = pwt_combine(numbers, amPMs, (hour, amPM) -> !between(hour, 1, 12) ? null : new Hour(hour, eqic(amPM, "pm")));
106  
    out.addAll(amPMTimes);
107  
108  
    // between 1 and 2 pm
109  
    for (P<Hour> time : amPMTimes)
110  
      for (P<S> and : parseToTheLeft(fixedToken("and"), time))
111  
        for (P<Hour> hour : pwt_toTheLeftOf(hours, and))
112  
          out.add(pwt_combine(new Between(new Hour(hour->hour, time->isPM), time!), time, hour));
113  
        
114  
    ret itIt(out);
115  
  }
116  
117  
  IF1<S, Int> number() { ret s -> isInteger(s) ? parseInt(s) : null; }
118  
  
119  
  bool isYear(int n) { ret between(n, 1900, 2100); }
120  
  bool isMonthNr(int n) { ret between(n, 1, 12); }
121  
  bool isDayOfMonth(int n) { ret between(n, 1, 31); }
122  
  bool isHour(int n) { ret between(n, 0, 23); }
123  
  bool isMinute(int n) { ret between(n, 0, 59); }
124  
  bool isSecond(int n) { ret between(n, 0, 59); }
125  
126  
  Hour numberToHour(int n) { ret !isHour(n) ? null : n > 12 ? new Hour(n-12, true) : new Hour(n, null); }
127  
128  
  IF1<S> fixedToken(S... tokens) { ret fixedToken(litciset(tokens)); }
129  
  IF1<S> fixedToken(Set<S> set) { ret t -> contains(set, t) ? t : null; }
130  
  
131  
  IF1<S, Weekday> weekday() {
132  
    ret s -> {
133  
      int n = parseEnglishWeekday(s);
134  
      ret n == 0 ? null : new Weekday(n);
135  
    };
136  
  }
137  
  
138  
  IF1<S, Month> monthName() {
139  
    ret s -> {
140  
      int n = parseEnglishMonthName(s);
141  
      ret n == 0 ? null : new Month(n);
142  
    };
143  
  }
144  
  
145  
  <A, B> L<ParsedWithTokens<B>> parseToTheLeft(IF1<A, B> f, ParsedWithTokens p) {
146  
    ret pwt_transform(f, pwt_precedingTokens(1, maxTokens, p.start()));
147  
  }
148  
  
149  
  <A, B> L<ParsedWithTokens<B>> parseToTheRight(IF1<A, B> f, ParsedWithTokens p) {
150  
    ret pwt_transform(f, pwt_followingTokens(1, maxTokens, p.remaining()));
151  
  }
152  
}

Author comment

Began life as a copy of #1028540

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1028573
Snippet name: EnglishDateParser, attempt 4 [good example for ParsedWithTokens, LIVE]
Eternal ID of this version: #1028573/144
Text MD5: 67a6a957f657034566fd8d147654050a
Transpilation MD5: c1d5b3384cc6f27c60ce6f676b91ff03
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2022-01-15 00:33:15
Source code size: 5805 bytes / 152 lines
Pitched / IR pitched: No / No
Views / Downloads: 435 / 1202
Version history: 143 change(s)
Referenced in: [show references]