Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

592
LINES

< > BotCompany Repo | #1005980 // unstructure (v11, with token iterator/on reader)

JavaX fragment (include)

1  
static Object unstructure(String text) {
2  
  ret unstructure(text, false);
3  
}
4  
5  
static Object unstructure(String text, final boolean allDynamic) {
6  
  ret unstructure(text, allDynamic, null);
7  
}
8  
9  
static int structure_internStringsLongerThan = 50;
10  
static int unstructure_unquoteBufSize = 100;
11  
12  
static int unstructure_tokrefs; // stats
13  
14  
abstract sclass unstructure_Receiver {
15  
  abstract void set(O o);
16  
}
17  
18  
// classFinder: func(name) -> class (optional)
19  
static Object unstructure(String text, boolean allDynamic,
20  
  O classFinder) {
21  
  if (text == null) ret null;
22  
  ret unstructure_tok(javaTokC_noMLS_iterator(text), allDynamic, classFinder);
23  
}
24  
25  
static O unstructure_reader(BufferedReader reader) {
26  
  ret unstructure_tok(javaTokC_noMLS_onReader(reader), false, null);
27  
}
28  
29  
static O unstructure_tok(final Producer<S> tok, final boolean allDynamic, final O classFinder) {
30  
  final boolean debug = unstructure_debug;
31  
  
32  
  final class X {
33  
    int i = -1;
34  
    new HashMap<Integer, O> refs;
35  
    new HashMap<Integer, O> tokrefs;
36  
    new HashSet<S> concepts;
37  
    new HashMap<S, Class> classesMap;
38  
    new L<Runnable> stack;
39  
    S curT;
40  
    char[] unquoteBuf = new char[unstructure_unquoteBufSize];
41  
    
42  
    S unquote(S s) {
43  
      ret unquoteUsingCharArray(s, unquoteBuf); 
44  
    }
45  
46  
    // look at current token
47  
    S t() {
48  
      ret curT;
49  
    }
50  
    
51  
    // get current token, move to next
52  
    S tpp() {
53  
      S t = curT;
54  
      consume();
55  
      ret t;
56  
    }
57  
    
58  
    void parse(final unstructure_Receiver out) {
59  
      S t = t();
60  
      
61  
      int refID = 0;
62  
      if (structure_isMarker(t, 0, l(t))) {
63  
        refID = parseInt(t.substring(1));
64  
        consume();
65  
      }
66  
      final int _refID = refID;
67  
      
68  
      // if (debug) print("parse: " + quote(t));
69  
      
70  
      final int tokIndex = i;  
71  
      parse_inner(refID, tokIndex, new unstructure_Receiver {
72  
        void set(O o) {
73  
          if (_refID != 0)
74  
            refs.put(_refID, o);
75  
          if (o != null)
76  
            tokrefs.put(tokIndex, o);
77  
          out.set(o);
78  
        }
79  
      });
80  
    }
81  
    
82  
    void parse_inner(int refID, int tokIndex, final unstructure_Receiver out) {
83  
      S t = t();
84  
      
85  
      // if (debug) print("parse_inner: " + quote(t));
86  
      
87  
      Class c = classesMap.get(t);
88  
      if (c == null) {
89  
        if (t.startsWith("\"")) {
90  
          S s = internIfLongerThan(unquote(tpp()), structure_internStringsLongerThan);
91  
          out.set(s); ret;
92  
        }
93  
        
94  
        if (t.startsWith("'")) {
95  
          out.set(unquoteCharacter(tpp())); ret;
96  
        }
97  
        if (t.equals("bigint")) {
98  
          out.set(parseBigInt()); ret;
99  
        }
100  
        if (t.equals("d")) {
101  
          out.set(parseDouble()); ret;
102  
        }
103  
        if (t.equals("fl")) {
104  
          out.set(parseFloat()); ret;
105  
        }
106  
        if (t.equals("sh")) {
107  
          consume();
108  
          t = tpp();
109  
          if (t.equals("-")) {
110  
            t = tpp();
111  
            out.set((short) (-parseInt(t)); ret;
112  
          }
113  
          out.set((short) parseInt(t)); ret;
114  
        }
115  
        if (t.equals("-")) {
116  
          consume();
117  
          t = tpp();
118  
          out.set(isLongConstant(t) ? (O) (-parseLong(t)) : (O) (-parseInt(t))); ret;
119  
        }
120  
        if (isInteger(t) || isLongConstant(t)) {
121  
          consume();
122  
          //if (debug) print("isLongConstant " + quote(t) + " => " + isLongConstant(t));
123  
          if (isLongConstant(t)) {
124  
            out.set(parseLong(t)); ret;
125  
          }
126  
          long l = parseLong(t);
127  
          bool isInt = l == (int) l;
128  
          if (debug)
129  
            print("l=" + l + ", isInt: " + isInt);
130  
          out.set(isInt ? (O) new Integer((int) l) : (O) new Long(l)); ret;
131  
        }
132  
        if (t.equals("false") || t.equals("f")) {
133  
          consume(); out.set(false); ret;
134  
        }
135  
        if (t.equals("true") || t.equals("t")) {
136  
          consume(); out.set(true); ret;
137  
        }
138  
        if (t.equals("-")) {
139  
          consume();
140  
          t = tpp();
141  
          out.set(isLongConstant(t) ? (O) (-parseLong(t)) : (O) (-parseInt(t))); ret;
142  
        }
143  
        if (isInteger(t) || isLongConstant(t)) {
144  
          consume();
145  
          //if (debug) print("isLongConstant " + quote(t) + " => " + isLongConstant(t));
146  
          if (isLongConstant(t)) {
147  
            out.set(parseLong(t)); ret;
148  
          }
149  
          long l = parseLong(t);
150  
          bool isInt = l == (int) l;
151  
          if (debug)
152  
            print("l=" + l + ", isInt: " + isInt);
153  
          out.set(isInt ? (O) new Integer((int) l) : (O) new Long(l)); ret;
154  
        }
155  
        
156  
        if (t.equals("File")) {
157  
          consume();
158  
          File f = new File(unquote(tpp()));
159  
          out.set(f); ret;
160  
        }
161  
        
162  
        if (t.startsWith("r") && isInteger(t.substring(1))) {
163  
          consume();
164  
          int ref = Integer.parseInt(t.substring(1));
165  
          O o = refs.get(ref);
166  
          if (o == null)
167  
            print("Warning: unsatisfied back reference " + ref);
168  
          out.set(o); ret;
169  
        }
170  
      
171  
        if (t.startsWith("t") && isInteger(t.substring(1))) {
172  
          consume();
173  
          int ref = Integer.parseInt(t.substring(1));
174  
          O o = tokrefs.get(ref);
175  
          if (o == null)
176  
            print("Warning: unsatisfied token reference " + ref);
177  
          out.set(o); ret;
178  
        }
179  
        
180  
        if (t.equals("hashset")) ret with parseHashSet(out);
181  
        if (t.equals("lhs")) ret with parseLinkedHashSet(out);
182  
        if (t.equals("treeset")) ret with parseTreeSet(out);
183  
        
184  
        if (eqOneOf(t, "hashmap", "hm")) {
185  
          consume();
186  
          parseMap(new HashMap, out);
187  
          ret;
188  
        }
189  
        if (t.equals("lhm")) {
190  
          consume();
191  
          parseMap(new LinkedHashMap, out);
192  
          ret;
193  
        }
194  
        if (t.equals("sync")) {
195  
          consume();
196  
          if (t().equals("tm")) {
197  
            consume();
198  
            ret with parseMap(synchronizedTreeMap(), out);
199  
          }
200  
          if (t().equals("["))
201  
            ret with parseList(synchroList(), out);
202  
          ret with parseMap(synchronizedMap(), out);
203  
        }
204  
        if (t.equals("{")) {
205  
          parseMap(out); ret;
206  
        }
207  
        if (t.equals("[")) {
208  
          this.parseList(new ArrayList, out); ret;
209  
        }
210  
        if (t.equals("bitset")) {
211  
          parseBitSet(out); ret;
212  
        }
213  
        if (t.equals("array") || t.equals("intarray")) {
214  
          parseArray(out); ret;
215  
        }
216  
        if (t.equals("ba")) {
217  
          consume();
218  
          S hex = unquote(tpp());
219  
          out.set(hexToBytes(hex)); ret;
220  
        }
221  
        if (t.equals("boolarray")) {
222  
          consume();
223  
          int n = parseInt(tpp());
224  
          S hex = unquote(tpp());
225  
          out.set(boolArrayFromBytes(hexToBytes(hex), n)); ret;
226  
        }
227  
        if (t.equals("class")) {
228  
          out.set(parseClass()); ret;
229  
        }
230  
        if (t.equals("l")) {
231  
          parseLisp(out); ret;
232  
        }
233  
        if (t.equals("null")) {
234  
          consume(); out.set(null); ret;
235  
        }
236  
        
237  
        if (eq(t, "c")) {
238  
          consume("c");
239  
          t = t();
240  
          assertTrue(isJavaIdentifier(t));
241  
          concepts.add(t);
242  
        }
243  
      }
244  
      
245  
      if (eq(t, "j")) {
246  
        consume("j");
247  
        out.set(parseJava()); ret;
248  
      }
249  
250  
      if (c == null && !isJavaIdentifier(t))
251  
        throw new RuntimeException("Unknown token " + (i+1) + ": " + t);
252  
        
253  
      // any other class name
254  
      if (c == null) {
255  
        // First, find class
256  
        if (allDynamic) c = null;
257  
        else c = classFinder != null ? (Class) callF(classFinder, "main$" + t) : findClass(t);
258  
        if (c != null)
259  
          classesMap.put(t, c);
260  
      }
261  
          
262  
      // Check if it has an outer reference
263  
      consume();
264  
      bool hasBracket = eq(t(), "(");
265  
      if (hasBracket) consume();
266  
      bool hasOuter = hasBracket && eq(t(), "this$1");
267  
      
268  
      DynamicObject dO = null;
269  
      O o = null;
270  
      fS thingName = t;
271  
      if (c != null) {
272  
        o = hasOuter ? nuStubInnerObject(c, classFinder) : nuEmptyObject(c);
273  
        if (o instanceof DynamicObject) dO = (DynamicObject) o;
274  
      } else {
275  
        if (concepts.contains(t) && (c = findClass("Concept")) != null)
276  
          o = dO = (DynamicObject) nuEmptyObject(c);
277  
        else
278  
          dO = new DynamicObject;
279  
        dO.className = t;
280  
        if (debug) print("Made dynamic object " + t + " " + shortClassName(dO));
281  
      }
282  
      
283  
      // Save in references list early because contents of object
284  
      // might link back to main object
285  
      
286  
      if (refID != 0)
287  
        refs.put(refID, o != null ? o : dO);
288  
      tokrefs.put(tokIndex, o != null ? o : dO);
289  
      
290  
      // NOW parse the fields!
291  
      
292  
      final new LinkedHashMap<S, O> fields; // preserve order
293  
      final O _o = o;
294  
      final DynamicObject _dO = dO;
295  
      if (hasBracket) {
296  
        stack.add(r {
297  
          if (eq(t(), ")")) {
298  
            consume(")");
299  
            objRead(_o, _dO, fields);
300  
            out.set(_o != null ? _o : _dO);
301  
          } else {
302  
            final S key = unquote(tpp());
303  
            if (!eq(tpp(), "="))
304  
              fail("= expected, got " + t() + " after " + quote(key) + " in object " + thingName /*+ " " + sfu(fields)*/);
305  
            stack.add(this);
306  
            parse(new unstructure_Receiver {
307  
              void set(O value) {
308  
                fields.put(key, value);
309  
                if (eq(t(), ",")) consume();
310  
              }
311  
            });
312  
          }
313  
        });
314  
      } else {
315  
        objRead(o, dO, fields);
316  
        out.set(o != null ? o : dO);
317  
      }
318  
    }
319  
    
320  
    void objRead(O o, DynamicObject dO, Map<S, O> fields) {
321  
      ifdef unstructure_debug
322  
      print("objRead " + className(o) + " " + className(dO) + " " + struct(fields));
323  
      endifdef
324  
      if (o != null)
325  
        if (dO != null) {
326  
          if (debug)
327  
            printStructure("setOptAllDyn", fields);
328  
          setOptAllDyn(dO, fields);
329  
        } else {
330  
          setOptAll_pcall(o, fields);
331  
          ifdef unstructure_debug
332  
            print("objRead now: " + struct(o));
333  
          endifdef
334  
        }
335  
      else for (S field : keys(fields))
336  
        dO.fieldValues.put(intern(field), fields.get(field));
337  
338  
      if (o != null)
339  
        pcallOpt_noArgs(o, "_doneLoading");
340  
    }
341  
    
342  
    void parseSet(final Set set, final unstructure_Receiver out) {
343  
      this.parseList(new ArrayList, new unstructure_Receiver {
344  
        void set(O o) {
345  
          set.addAll((L) o);
346  
          out.set(set);
347  
        }
348  
      });
349  
    }
350  
    
351  
    void parseLisp(final unstructure_Receiver out) {
352  
      ifclass Lisp
353  
        consume("l");
354  
        consume("(");
355  
        final new ArrayList list;
356  
        stack.add(r {
357  
          if (eq(t(), ")")) {
358  
            consume(")");
359  
            out.set(Lisp((S) list.get(0), subList(list, 1)));
360  
          } else {
361  
            stack.add(this);
362  
            parse(new unstructure_Receiver {
363  
              void set(O o) {
364  
                list.add(o);
365  
                if (eq(t(), ",")) consume();
366  
              }
367  
            });
368  
          }
369  
        });
370  
        if (false) // skip fail line
371  
      endif
372  
      
373  
      fail("class Lisp not included");
374  
    }
375  
    
376  
    void parseBitSet(final unstructure_Receiver out) {
377  
      consume("bitset");
378  
      consume("{");
379  
      final new BitSet bs;
380  
      stack.add(r {
381  
        if (eq(t(), "}")) {
382  
          consume("}");
383  
          out.set(bs);
384  
        } else {
385  
          stack.add(this);
386  
          parse(new unstructure_Receiver {
387  
            void set(O o) {
388  
              bs.set((Integer) o);
389  
              if (eq(t(), ",")) consume();
390  
            }
391  
          });
392  
        }
393  
      });
394  
    }
395  
    
396  
    void parseList(final L list, final unstructure_Receiver out) {
397  
      consume("[");
398  
      stack.add(r {
399  
        if (eq(t(), "]")) {
400  
          consume("]");
401  
          out.set(list);
402  
        } else {
403  
          stack.add(this);
404  
          parse(new unstructure_Receiver {
405  
            void set(O o) {
406  
              //if (debug) print("List element type: " + getClassName(o));
407  
              list.add(o);
408  
              if (eq(t(), ",")) consume();
409  
            }
410  
          });
411  
        }
412  
      });
413  
    }
414  
    
415  
    void parseArray(final unstructure_Receiver out) {
416  
      final S type = tpp();
417  
      consume("{");
418  
      final List list = new ArrayList;
419  
      
420  
      stack.add(r {
421  
        if (eq(t(), "}")) {
422  
          consume("}");
423  
          out.set(type.equals("intarray") ? toIntArray(list) : list.toArray());
424  
        } else {
425  
          stack.add(this);
426  
          parse(new unstructure_Receiver {
427  
            void set(O o) {
428  
              list.add(o);
429  
              if (eq(t(), ",")) consume();
430  
            }
431  
          });
432  
        }
433  
      });
434  
    }
435  
    
436  
    Object parseClass() {
437  
      consume("class");
438  
      consume("(");
439  
      S name = unquote(tpp());
440  
      consume(")");
441  
      name = dropPrefix("main$", name);
442  
      Class c = allDynamic ? null : classFinder != null ? (Class) callF(classFinder, name) : findClass(name);
443  
      if (c != null) ret c;
444  
      new DynamicObject dO;
445  
      dO.className = "java.lang.Class";
446  
      dO.fieldValues.put("name", name);
447  
      ret dO;
448  
    }
449  
    
450  
    Object parseBigInt() {
451  
      consume("bigint");
452  
      consume("(");
453  
      S val = tpp();
454  
      if (eq(val, "-"))
455  
        val = "-" + tpp();
456  
      consume(")");
457  
      ret new BigInteger(val);
458  
    }
459  
    
460  
    Object parseDouble() {
461  
      consume("d");
462  
      consume("(");
463  
      S val = unquote(tpp());
464  
      consume(")");
465  
      ret Double.parseDouble(val);
466  
    }
467  
    
468  
    Object parseFloat() {
469  
      consume("fl");
470  
      S val;
471  
      if (eq(t(), "(")) {
472  
        consume("(");
473  
        val = unquote(tpp());
474  
        consume(")");
475  
      } else {
476  
        val = unquote(tpp());
477  
      }
478  
      ret Float.parseFloat(val);
479  
    }
480  
    
481  
    void parseHashSet(unstructure_Receiver out) {
482  
      consume("hashset");
483  
      parseSet(new HashSet, out);
484  
    }
485  
    
486  
    void parseLinkedHashSet(unstructure_Receiver out) {
487  
      consume("lhs");
488  
      parseSet(new LinkedHashSet, out);
489  
    }
490  
    
491  
    void parseTreeSet(unstructure_Receiver out) {
492  
      consume("treeset");
493  
      parseSet(new TreeSet, out);
494  
    }
495  
    
496  
    void parseMap(unstructure_Receiver out) {
497  
      parseMap(new TreeMap, out);
498  
    }
499  
    
500  
    O parseJava() {
501  
      S j = unquote(tpp());
502  
      new Matches m;
503  
      if (jmatch("java.awt.Color[r=*,g=*,b=*]", j, m))
504  
        ret nuObject("java.awt.Color", parseInt($1), parseInt($2), parseInt($3));
505  
      else {
506  
        warn("Unknown Java object: " + j);
507  
        null;
508  
      }
509  
    }
510  
    
511  
    void parseMap(final Map map, final unstructure_Receiver out) {
512  
      consume("{");
513  
      stack.add(new Runnable {
514  
        bool v;
515  
        O key;
516  
        
517  
        public void run() { 
518  
          if (v) {
519  
            v = false;
520  
            stack.add(this);
521  
            if (!eq(tpp(), "="))
522  
              fail("= expected, got " + t() + " in map of size " + l(map));
523  
524  
            parse(new unstructure_Receiver {
525  
              void set(O value) {
526  
                map.put(key, value);
527  
                if (debug)
528  
                  print("parseMap: Got value " + getClassName(value) + ", next token: " + quote(t()));
529  
                if (eq(t(), ",")) consume();
530  
              }
531  
            });
532  
          } else {
533  
            if (eq(t(), "}")) {
534  
              consume("}");
535  
              out.set(map);
536  
            } else {
537  
              v = true;
538  
              stack.add(this);
539  
              parse(new unstructure_Receiver {
540  
                void set(O o) {
541  
                  key = o;
542  
                }
543  
              });
544  
            }
545  
          } // if v else
546  
        } // run()
547  
      });
548  
    }
549  
    
550  
    /*void parseSub(unstructure_Receiver out) {
551  
      int n = l(stack);
552  
      parse(out);
553  
      while (l(stack) > n)
554  
        stack
555  
    }*/
556  
    
557  
    void consume() { curT = tok.next(); ++i; }
558  
    
559  
    void consume(S s) {
560  
      if (!eq(t(), s)) {
561  
        /*S prevToken = i-1 >= 0 ? tok.get(i-1) : "";
562  
        S nextTokens = join(tok.subList(i, Math.min(i+2, tok.size())));
563  
        fail(quote(s) + " expected: " + prevToken + " " + nextTokens + " (" + i + "/" + tok.size() + ")");*/
564  
        fail(quote(s) + " expected, got " + quote(t()));
565  
      }
566  
      consume();
567  
    }
568  
    
569  
    void parse_x(unstructure_Receiver out) {
570  
      consume(); // get first token
571  
      parse(out);
572  
      while (nempty(stack))
573  
        popLast(stack).run();
574  
    }
575  
  }
576  
  
577  
  Bool b = DynamicObject_loading!;
578  
  DynamicObject_loading.set(true);
579  
  try {
580  
    final new Var v;
581  
    new X x;
582  
    x.parse_x(new unstructure_Receiver {
583  
      void set(O o) { v.set(o); }
584  
    });
585  
    unstructure_tokrefs = x.tokrefs.size();
586  
    ret v.get();
587  
  } finally {
588  
    DynamicObject_loading.set(b);
589  
  }
590  
}
591  
592  
static boolean unstructure_debug;

Author comment

Began life as a copy of #1005975

download  show line numbers  debug dex  old transpilations   

Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, irmadwmeruwu, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1005980
Snippet name: unstructure (v11, with token iterator/on reader)
Eternal ID of this version: #1005980/42
Text MD5: bb7c8bd1277b727c14fb0187507d1f89
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2019-01-01 19:39:26
Source code size: 17167 bytes / 592 lines
Pitched / IR pitched: No / No
Views / Downloads: 730 / 722
Version history: 41 change(s)
Referenced in: [show references]