Libraryless. Click here for Pure Java version (8327L/47K).
1 | sclass EnglishDateParser > DateStructures { |
2 | replace P with ParsedWithTokens. |
3 | |
4 | bool assumeFuture = true; // e.g. for "tuesday" [not used] |
5 | LS tok; |
6 | int maxTokens = 3; |
7 | |
8 | // "top dogs" are the longest non-overlapping parses |
9 | L<P<SomeDate>> topDogs(S s) { |
10 | ret pwt_topDogs(pwt_filterByType SomeDate(allParses(s))); |
11 | } |
12 | |
13 | SomeDate parse(S s) { |
14 | ret getVar(first(topDogs(s))); |
15 | } |
16 | |
17 | ItIt<P> allParses(S s) { |
18 | // tokenize, initialize |
19 | L<P<S>> initials = pwt_initial(tok = javaTok(s), maxTokens); |
20 | new L<P> out; |
21 | |
22 | // find numbers |
23 | L<P<Int>> numbers = pwt_transform(number(), initials); |
24 | |
25 | // find ordinals (1st, 2nd, ...) |
26 | new L<P<Int>> ordinals; |
27 | for (P<Int> number : numbers) |
28 | for (P<S> ord : parseToTheRight(fixedToken("st", "nd", "rd", "th"), number)) |
29 | ordinals.add(pwt_combine(number, ord)); |
30 | //print(+ordinals); |
31 | |
32 | // "in <n> days" |
33 | for (P<Int> number : numbers) |
34 | for (P<S> in : parseToTheLeft(fixedToken("in"), number)) |
35 | for (P<S> days : parseToTheRight(fixedToken("day", "days"), number)) |
36 | out.add(pwt_combine(new TodayPlus(number!), in, days)); |
37 | |
38 | // "<n> days from now" |
39 | for (P<Int> number : numbers) |
40 | for (P<S> daysFromNow : parseToTheRight(fixedToken("day from now", "days from now"), number)) |
41 | out.add(pwt_combine(new TodayPlus(number!), number, daysFromNow)); |
42 | |
43 | L<P<Int>> years = pwt_filter isYear(numbers); |
44 | out.addAll(years); |
45 | |
46 | L<P<Int>> months = pwt_filter isMonthNr(numbers); |
47 | L<P<Int>> dayOfMonths = pwt_filter isDayOfMonth(numbers); |
48 | |
49 | L<P<Weekday>> weekdays = pwt_transform(weekday(), initials); |
50 | out.addAll(weekdays); |
51 | |
52 | // month names |
53 | L<P<Month>> monthNames = pwt_transform(monthName(), initials); |
54 | out.addAll(monthNames); |
55 | |
56 | // month name + year, e.g. "February 2020" |
57 | out.addAll(pwt_combine(monthNames, years, (month, year) -> new Month(month.month, new Year(year)))); |
58 | |
59 | // month name + ordinal, e.g. "March 4th" |
60 | out.addAll(pwt_combine(monthNames, ordinals, (month, ord) -> new Day(ord, month))); |
61 | |
62 | // yesterday, today, tomorrow |
63 | out.addAll(pwt_transform(t -> |
64 | eqic(t, "yesterday") ? new TodayPlus(-1) : |
65 | eqic(t, "today") ? new TodayPlus(0) : |
66 | new TodayPlus(1), |
67 | pwt_transform(fixedToken("yesterday", "today", "tomorrow"), initials))); |
68 | |
69 | // last/this/next week |
70 | for (P<S> week : pwt_transform(fixedToken("week"), initials)) |
71 | for (P<S> which : parseToTheLeft(fixedToken("last", "this", "next"), week)) |
72 | out.add(pwt_combine(new CurrentWeekPlus( |
73 | eqic(which!, "last") ? -1 |
74 | : eqic(which!, "this") ? 0 : 1), which, week)); |
75 | |
76 | // "next <weekday>" |
77 | for (P<Weekday> weekday : weekdays) |
78 | for (P<S> next : parseToTheLeft(fixedToken("next"), weekday)) |
79 | out.add(pwt_combine(new Weekday(weekday->weekday, new CurrentWeekPlus(1)), next, weekday)); |
80 | |
81 | for (P<Int> year : years) |
82 | for (P<S> slash : parseToTheRight(fixedToken("/"), year)) |
83 | for (P<Int> month : pwt_toTheRightOf(months, slash)) |
84 | for (P<S> slash2 : parseToTheRight(fixedToken("/"), month)) |
85 | for (P<Int> day : pwt_toTheRightOf(dayOfMonths, slash2)) |
86 | out.add(pwt_combine(new Day(day!, new Month(month!, new Year(year!))), year, day)); |
87 | |
88 | L<P<Hour>> hours = pwt_transform numberToHour(numbers); |
89 | L<P<Int>> minutes = pwt_filter isMinute(numbers); |
90 | L<P<Int>> seconds = minutes; |
91 | |
92 | L<P<S>> colons = pwt_filter(t -> eq(t, ":"), initials); |
93 | |
94 | // 15:12 etc. |
95 | L<P<Minute>> hoursAndMinutes = pwt_combine(hours, colons, minutes, (h, _, m) -> new Minute(m, h)); |
96 | out.addAll(hoursAndMinutes); |
97 | |
98 | L<P<Second>> hoursAndMinutesAndSeconds = pwt_combine(hoursAndMinutes, colons, seconds, |
99 | (hm, _, second) -> new Second(second, hm)); |
100 | out.addAll(hoursAndMinutesAndSeconds); |
101 | |
102 | L<P<S>> amPMs = pwt_transform(fixedToken("am", "pm"), initials); |
103 | |
104 | // 3 am, 5 pm etc. |
105 | L<P<Hour>> amPMTimes = pwt_combine(numbers, amPMs, (hour, amPM) -> !between(hour, 1, 12) ? null : new Hour(hour, eqic(amPM, "pm"))); |
106 | out.addAll(amPMTimes); |
107 | |
108 | // between 1 and 2 pm |
109 | for (P<Hour> time : amPMTimes) |
110 | for (P<S> and : parseToTheLeft(fixedToken("and"), time)) |
111 | for (P<Hour> hour : pwt_toTheLeftOf(hours, and)) |
112 | out.add(pwt_combine(new Between(new Hour(hour->hour, time->isPM), time!), time, hour)); |
113 | |
114 | ret itIt(out); |
115 | } |
116 | |
117 | IF1<S, Int> number() { ret s -> isInteger(s) ? parseInt(s) : null; } |
118 | |
119 | bool isYear(int n) { ret between(n, 1900, 2100); } |
120 | bool isMonthNr(int n) { ret between(n, 1, 12); } |
121 | bool isDayOfMonth(int n) { ret between(n, 1, 31); } |
122 | bool isHour(int n) { ret between(n, 0, 23); } |
123 | bool isMinute(int n) { ret between(n, 0, 59); } |
124 | bool isSecond(int n) { ret between(n, 0, 59); } |
125 | |
126 | Hour numberToHour(int n) { ret !isHour(n) ? null : n > 12 ? new Hour(n-12, true) : new Hour(n, null); } |
127 | |
128 | IF1<S> fixedToken(S... tokens) { ret fixedToken(litciset(tokens)); } |
129 | IF1<S> fixedToken(Set<S> set) { ret t -> contains(set, t) ? t : null; } |
130 | |
131 | IF1<S, Weekday> weekday() { |
132 | ret s -> { |
133 | int n = parseEnglishWeekday(s); |
134 | ret n == 0 ? null : new Weekday(n); |
135 | }; |
136 | } |
137 | |
138 | IF1<S, Month> monthName() { |
139 | ret s -> { |
140 | int n = parseEnglishMonthName(s); |
141 | ret n == 0 ? null : new Month(n); |
142 | }; |
143 | } |
144 | |
145 | <A, B> L<ParsedWithTokens<B>> parseToTheLeft(IF1<A, B> f, ParsedWithTokens p) { |
146 | ret pwt_transform(f, pwt_precedingTokens(1, maxTokens, p.start())); |
147 | } |
148 | |
149 | <A, B> L<ParsedWithTokens<B>> parseToTheRight(IF1<A, B> f, ParsedWithTokens p) { |
150 | ret pwt_transform(f, pwt_followingTokens(1, maxTokens, p.remaining())); |
151 | } |
152 | } |
Began life as a copy of #1028540
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028573 |
Snippet name: | EnglishDateParser, attempt 4 [good example for ParsedWithTokens, LIVE] |
Eternal ID of this version: | #1028573/144 |
Text MD5: | 67a6a957f657034566fd8d147654050a |
Transpilation MD5: | c1d5b3384cc6f27c60ce6f676b91ff03 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2022-01-15 00:33:15 |
Source code size: | 5805 bytes / 152 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 526 / 1322 |
Version history: | 143 change(s) |
Referenced in: | [show references] |