Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

113
LINES

< > BotCompany Repo | #1005501 // flexMatchIC2 - flexMatch (ignore case, with (a|b), does not drop punctuation)

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (3798L/24K).

1  
static bool flexMatchIC2_debug;
2  
3  
static bool flexMatchIC2(S pat, S s) {
4  
  ret flexMatchIC2(pat, s, null);
5  
}
6  
7  
static bool flexMatchIC2(S pat, S s, Matches m) {
8  
  ret flexMatchIC2(javaTok(pat), javaTok_cached(unnull(s)), m);
9  
}
10  
11  
static bool flexMatchIC2(S pat, S s, Matches m, bool joinBrackets) {
12  
  ret flexMatchIC2(javaTok(pat), javaTok_cached(unnull(s)), m, joinBrackets);
13  
}
14  
15  
sbool flexMatchIC2(S pat, LS tokfull, Matches m, bool joinBrackets) {
16  
  ret flexMatchIC2(javaTok(pat), tokfull, m, joinBrackets);
17  
}
18  
19  
static bool flexMatchIC2(L<S> tokpat, L<S> tokfull, Matches m) {
20  
  ret flexMatchIC2(tokpat, tokfull, m, true);
21  
}
22  
23  
static bool flexMatchIC2(L<S> tokpat, L<S> tokfull, Matches m, bool joinBrackets) {
24  
  tokpat = codeTokens(joinBrackets ? joinBrackets(tokpat) : tokpat);
25  
  for (int i = 0; i < l(tokpat); i++)
26  
    if (eq(tokpat.get(i), "*"))
27  
      tokpat.add(i++, "!*"); // insert single-token wildcard in front to avoid empty matches
28  
  if (joinBrackets) tokfull = joinBrackets(tokfull);
29  
  L<S> tok = codeTokens(tokfull);
30  
  new BitSet bla;
31  
  new BitSet bla2;
32  
  if (!flexMatchIC2_impl(tokpat, 0, tok, 0, bla, bla2)) ret false;
33  
  if (m != null) {
34  
    new L<S> l;
35  
    for (int i = 1; i < l(tokfull); i += 2) {
36  
      if (bla.get(i/2)) {
37  
        int j = i;
38  
        while (j < l(tokfull) && bla.get(j/2)) j += 2;
39  
        l.add(join(subList(tokfull, i, j-1)));
40  
        i = j-2;
41  
      } else if (bla2.get(i/2))
42  
        l.add(tokfull.get(i));
43  
    }
44  
    m.m = toStringArray(l);
45  
  }
46  
  ret true;
47  
}
48  
49  
static bool flexMatchIC2_impl(L<S> pat, int ipat, L<S> tok, int itok, BitSet bla, BitSet bla2) {
50  
  if (flexMatchIC2_debug)
51  
    print("flexMatchIC2 pat=" + structure(subList(pat, ipat)) + " tok=" + structure(subList(tok, itok)) + " " + structure(bla));
52  
  if (ipat >= l(pat))
53  
    ret itok >= l(tok);
54  
  S t = pat.get(ipat);
55  
  
56  
  if (eq(t, "*")) { // the flex wildcard (0 or more tokens)
57  
    if (flexMatchIC2_debug) print("Trying zero tokens");
58  
    if (flexMatchIC2_impl(pat, ipat+1, tok, itok, bla, bla2)) {
59  
      if (flexMatchIC2_debug) print("Success!");
60  
      ret true;
61  
    }
62  
    
63  
    bla.set(itok);
64  
    if (itok < l(tok)) {
65  
      if (flexMatchIC2_debug) print("Trying one or more tokens");
66  
      if (flexMatchIC2_impl(pat, ipat, tok, itok+1, bla, bla2)) {
67  
        if (flexMatchIC2_debug) print("Success!");
68  
        ret true; // success, leave mark
69  
      }
70  
    }
71  
    
72  
    if (flexMatchIC2_debug) print("Failed * matching");
73  
    bla.clear(itok); // fail, undo marking
74  
    ret false;
75  
  }
76  
  if (itok >= l(tok)) {
77  
    if (flexMatchIC2_debug)
78  
      print("too much pattern");
79  
    ret false;
80  
  }
81  
  if (eq(t, "!*")) { // the single-token wildcard
82  
    bla.set(itok);
83  
    if (flexMatchIC2_impl(pat, ipat+1, tok, itok+1, bla, bla2))
84  
      ret true; // success, leave mark
85  
    bla.clear(itok); // fail, undo marking
86  
    ret false;
87  
  }
88  
  S realt = tok.get(itok);
89  
  if (t.startsWith("(") && t.endsWith(")")) {
90  
    // quick pre-check
91  
    if (flexMatchIC2_debug)
92  
      print("flexMatchIC2 precheck " + t + " " + realt);
93  
    if (!containsIgnoreCase(t, realt)) false;
94  
    // real check
95  
    L<S> list = splitAt(dropFirstAndLast(t), "|");
96  
    if (flexMatchIC2_debug)
97  
      print("flexMatchIC2 real check " + struct(list));
98  
    if (!containsIgnoreCase(list, realt)) false;
99  
    bla2.set(itok);
100  
  } else if (neqic(realt, t)) {
101  
    if (flexMatchIC2_debug)
102  
      print("mismatch");
103  
    ret false;
104  
  }
105  
  
106  
  // it is a token match. consume and proceed
107  
  if (flexMatchIC2_impl(pat, ipat+1, tok, itok+1, bla, bla2))
108  
    true;
109  
  else {
110  
    bla2.clear(itok);
111  
    false;
112  
  }
113  
}

Author comment

Began life as a copy of #1005459

download  show line numbers  debug dex  old transpilations   

Travelled to 16 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, ddnzoavkxhuk, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, sawdedvomwva, tslmcundralx, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1005501
Snippet name: flexMatchIC2 - flexMatch (ignore case, with (a|b), does not drop punctuation)
Eternal ID of this version: #1005501/14
Text MD5: bd35ec1d3a4f12f224966db6f88bd039
Transpilation MD5: 3f30e2edbb0c7377603238b836b6366d
Author: stefan
Category: javax / parsing
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-07-05 21:38:57
Source code size: 3648 bytes / 113 lines
Pitched / IR pitched: No / No
Views / Downloads: 690 / 736
Version history: 13 change(s)
Referenced in: [show references]