Libraryless. Click here for Pure Java version (8327L/47K).
sclass EnglishDateParser > DateStructures { replace P with ParsedWithTokens. bool assumeFuture = true; // e.g. for "tuesday" [not used] LS tok; int maxTokens = 3; // "top dogs" are the longest non-overlapping parses L<P<SomeDate>> topDogs(S s) { ret pwt_topDogs(pwt_filterByType SomeDate(allParses(s))); } SomeDate parse(S s) { ret getVar(first(topDogs(s))); } ItIt<P> allParses(S s) { // tokenize, initialize L<P<S>> initials = pwt_initial(tok = javaTok(s), maxTokens); new L<P> out; // find numbers L<P<Int>> numbers = pwt_transform(number(), initials); // find ordinals (1st, 2nd, ...) new L<P<Int>> ordinals; for (P<Int> number : numbers) for (P<S> ord : parseToTheRight(fixedToken("st", "nd", "rd", "th"), number)) ordinals.add(pwt_combine(number, ord)); //print(+ordinals); // "in <n> days" for (P<Int> number : numbers) for (P<S> in : parseToTheLeft(fixedToken("in"), number)) for (P<S> days : parseToTheRight(fixedToken("day", "days"), number)) out.add(pwt_combine(new TodayPlus(number!), in, days)); // "<n> days from now" for (P<Int> number : numbers) for (P<S> daysFromNow : parseToTheRight(fixedToken("day from now", "days from now"), number)) out.add(pwt_combine(new TodayPlus(number!), number, daysFromNow)); L<P<Int>> years = pwt_filter isYear(numbers); out.addAll(years); L<P<Int>> months = pwt_filter isMonthNr(numbers); L<P<Int>> dayOfMonths = pwt_filter isDayOfMonth(numbers); L<P<Weekday>> weekdays = pwt_transform(weekday(), initials); out.addAll(weekdays); // month names L<P<Month>> monthNames = pwt_transform(monthName(), initials); out.addAll(monthNames); // month name + year, e.g. "February 2020" out.addAll(pwt_combine(monthNames, years, (month, year) -> new Month(month.month, new Year(year)))); // month name + ordinal, e.g. "March 4th" out.addAll(pwt_combine(monthNames, ordinals, (month, ord) -> new Day(ord, month))); // yesterday, today, tomorrow out.addAll(pwt_transform(t -> eqic(t, "yesterday") ? new TodayPlus(-1) : eqic(t, "today") ? new TodayPlus(0) : new TodayPlus(1), pwt_transform(fixedToken("yesterday", "today", "tomorrow"), initials))); // last/this/next week for (P<S> week : pwt_transform(fixedToken("week"), initials)) for (P<S> which : parseToTheLeft(fixedToken("last", "this", "next"), week)) out.add(pwt_combine(new CurrentWeekPlus( eqic(which!, "last") ? -1 : eqic(which!, "this") ? 0 : 1), which, week)); // "next <weekday>" for (P<Weekday> weekday : weekdays) for (P<S> next : parseToTheLeft(fixedToken("next"), weekday)) out.add(pwt_combine(new Weekday(weekday->weekday, new CurrentWeekPlus(1)), next, weekday)); for (P<Int> year : years) for (P<S> slash : parseToTheRight(fixedToken("/"), year)) for (P<Int> month : pwt_toTheRightOf(months, slash)) for (P<S> slash2 : parseToTheRight(fixedToken("/"), month)) for (P<Int> day : pwt_toTheRightOf(dayOfMonths, slash2)) out.add(pwt_combine(new Day(day!, new Month(month!, new Year(year!))), year, day)); L<P<Hour>> hours = pwt_transform numberToHour(numbers); L<P<Int>> minutes = pwt_filter isMinute(numbers); L<P<Int>> seconds = minutes; L<P<S>> colons = pwt_filter(t -> eq(t, ":"), initials); // 15:12 etc. L<P<Minute>> hoursAndMinutes = pwt_combine(hours, colons, minutes, (h, _, m) -> new Minute(m, h)); out.addAll(hoursAndMinutes); L<P<Second>> hoursAndMinutesAndSeconds = pwt_combine(hoursAndMinutes, colons, seconds, (hm, _, second) -> new Second(second, hm)); out.addAll(hoursAndMinutesAndSeconds); L<P<S>> amPMs = pwt_transform(fixedToken("am", "pm"), initials); // 3 am, 5 pm etc. L<P<Hour>> amPMTimes = pwt_combine(numbers, amPMs, (hour, amPM) -> !between(hour, 1, 12) ? null : new Hour(hour, eqic(amPM, "pm"))); out.addAll(amPMTimes); // between 1 and 2 pm for (P<Hour> time : amPMTimes) for (P<S> and : parseToTheLeft(fixedToken("and"), time)) for (P<Hour> hour : pwt_toTheLeftOf(hours, and)) out.add(pwt_combine(new Between(new Hour(hour->hour, time->isPM), time!), time, hour)); ret itIt(out); } IF1<S, Int> number() { ret s -> isInteger(s) ? parseInt(s) : null; } bool isYear(int n) { ret between(n, 1900, 2100); } bool isMonthNr(int n) { ret between(n, 1, 12); } bool isDayOfMonth(int n) { ret between(n, 1, 31); } bool isHour(int n) { ret between(n, 0, 23); } bool isMinute(int n) { ret between(n, 0, 59); } bool isSecond(int n) { ret between(n, 0, 59); } Hour numberToHour(int n) { ret !isHour(n) ? null : n > 12 ? new Hour(n-12, true) : new Hour(n, null); } IF1<S> fixedToken(S... tokens) { ret fixedToken(litciset(tokens)); } IF1<S> fixedToken(Set<S> set) { ret t -> contains(set, t) ? t : null; } IF1<S, Weekday> weekday() { ret s -> { int n = parseEnglishWeekday(s); ret n == 0 ? null : new Weekday(n); }; } IF1<S, Month> monthName() { ret s -> { int n = parseEnglishMonthName(s); ret n == 0 ? null : new Month(n); }; } <A, B> L<ParsedWithTokens<B>> parseToTheLeft(IF1<A, B> f, ParsedWithTokens p) { ret pwt_transform(f, pwt_precedingTokens(1, maxTokens, p.start())); } <A, B> L<ParsedWithTokens<B>> parseToTheRight(IF1<A, B> f, ParsedWithTokens p) { ret pwt_transform(f, pwt_followingTokens(1, maxTokens, p.remaining())); } }
Began life as a copy of #1028540
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028573 |
Snippet name: | EnglishDateParser, attempt 4 [good example for ParsedWithTokens, LIVE] |
Eternal ID of this version: | #1028573/144 |
Text MD5: | 67a6a957f657034566fd8d147654050a |
Transpilation MD5: | c1d5b3384cc6f27c60ce6f676b91ff03 |
Author: | stefan |
Category: | javax |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2022-01-15 00:33:15 |
Source code size: | 5805 bytes / 152 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 525 / 1321 |
Version history: | 143 change(s) |
Referenced in: | [show references] |