Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

712
LINES

< > BotCompany Repo | #1030718 // unstructure (v17, more customizability, dev.)

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (5173L/30K).

// TODO: cyclic structures involving certain lists & sets

sO unstructure(S text) {
  ret unstructure(text, false);
}

sO unstructure(S text, unstructure_Data data) {
  ret unstructure_tok(javaTokC_noMLS_iterator(text), false, null, data);
}

sO unstructure(String text, final boolean allDynamic) {
  ret unstructure(text, allDynamic, null);
}

static int structure_internStringsLongerThan = 50;
static int unstructure_unquoteBufSize = 100;

static int unstructure_tokrefs; // stats

abstract sclass unstructure_Receiver {
  abstract void set(O o);
}

// classFinder: func(name) -> class (optional)
static Object unstructure(S text, bool allDynamic,
  O classFinder) {
  if (text == null) ret null;
  ret unstructure_tok(javaTokC_noMLS_iterator(text), allDynamic, classFinder);
}

static O unstructure_reader(BufferedReader reader) {
  ret unstructure_tok(javaTokC_noMLS_onReader(reader), false, null);
}

sclass unstructure_Data {
  Producer<S> tok;
  bool allDynamic;
  O classFinder;
  int i = -1;
  new HashMap<Integer, O> refs;
  new HashMap<Integer, O> tokrefs;
  new HashSet<S> concepts;
  new HashMap<S, Class> classesMap;
  new L<Runnable> stack;
  S curT;
  char[] unquoteBuf = new char[unstructure_unquoteBufSize];
  
  void out_set(unstructure_Receiver out, O value) {
    out.set(postProcessValue(value));
  }
  
  // overridable
  O postProcessValue(O value) { ret value; }
  
  Class findAClass(S fullClassName) {
    ret classFinder != null ? (Class) callF(classFinder, fullClassName) : findClass_fullName(fullClassName);
  }
  
  S unquote(S s) {
    ret unquoteUsingCharArray(s, unquoteBuf); 
  }

  // look at current token
  S t() {
    ret curT;
  }
  
  // get current token, move to next
  S tpp() {
    S t = curT;
    consume();
    ret t;
  }
  
  void parse(final unstructure_Receiver out) {
    S t = t();
    
    int refID;
    if (structure_isMarker(t, 0, l(t))) {
      refID = parseInt(t.substring(1));
      consume();
    } else refID = -1;
    
    // if (debug) print("parse: " + quote(t));
    
    final int tokIndex = i;  
    parse_inner(refID, tokIndex, new unstructure_Receiver {
      void set(O o) {
        if (refID >= 0)
          refs.put(refID, o);
        if (o != null)
          tokrefs.put(tokIndex, o);
        out_set(out, o);
      }
    });
  }
  
  void parse_inner(int refID, int tokIndex, final unstructure_Receiver out) {
    S t = t();
    
    // if (debug) print("parse_inner: " + quote(t));
    
    Class c = classesMap.get(t);
    if (c == null) {
      if (t.startsWith("\"")) {
        S s = internIfLongerThan(unquote(tpp()), structure_internStringsLongerThan);
        out_set(out, s); ret;
      }
      
      if (t.startsWith("'")) {
        out_set(out, unquoteCharacter(tpp())); ret;
      }
      if (t.equals("bigint")) {
        out_set(out, parseBigInt()); ret;
      }
      if (t.equals("d")) {
        out_set(out, parseDouble()); ret;
      }
      if (t.equals("fl")) {
        out_set(out, parseFloat()); ret;
      }
      if (t.equals("sh")) {
        consume();
        t = tpp();
        if (t.equals("-")) {
          t = tpp();
          out_set(out, (short) (-parseInt(t)); ret;
        }
        out_set(out, (short) parseInt(t)); ret;
      }
      if (t.equals("-")) {
        consume();
        t = tpp();
        out_set(out, isLongConstant(t) ? (O) (-parseLong(t)) : (O) (-parseInt(t))); ret;
      }
      if (isInteger(t) || isLongConstant(t)) {
        consume();
        //if (debug) print("isLongConstant " + quote(t) + " => " + isLongConstant(t));
        if (isLongConstant(t)) {
          out_set(out, parseLong(t)); ret;
        }
        long l = parseLong(t);
        bool isInt = l == (int) l;
        ifdef unstructure_debug
          print("l=" + l + ", isInt: " + isInt);
        endifdef
        out_set(out, isInt ? (O) Integer.valueOf((int) l) : (O) Long.valueOf(l)); ret;
      }
      if (t.equals("false") || t.equals("f")) {
        consume(); out_set(out, false); ret;
      }
      if (t.equals("true") || t.equals("t")) {
        consume(); out_set(out, true); ret;
      }
      if (t.equals("-")) {
        consume();
        t = tpp();
        out_set(out, isLongConstant(t) ? (O) (-parseLong(t)) : (O) (-parseInt(t))); ret;
      }
      if (isInteger(t) || isLongConstant(t)) {
        consume();
        //if (debug) print("isLongConstant " + quote(t) + " => " + isLongConstant(t));
        if (isLongConstant(t)) {
          out_set(out, parseLong(t)); ret;
        }
        long l = parseLong(t);
        bool isInt = l == (int) l;
        ifdef unstructure_debug
          print("l=" + l + ", isInt: " + isInt);
        endifdef
        out_set(out, isInt ? (O) Integer.valueOf((int) l) : (O) Long.valueOf(l)); ret;
      }
      
      if (t.equals("File")) {
        consume();
        File f = new File(unquote(tpp()));
        out_set(out, f); ret;
      }
      
      if (t.startsWith("r") && isInteger(t.substring(1))) {
        consume();
        int ref = Integer.parseInt(t.substring(1));
        O o = refs.get(ref);
        if (o == null)
          fail("unsatisfied back reference " + ref);
        out_set(out, o); ret;
      }
    
      if (t.startsWith("t") && isInteger(t.substring(1))) {
        consume();
        int ref = Integer.parseInt(t.substring(1));
        O o = tokrefs.get(ref);
        if (o == null)
          fail("unsatisfied token reference " + ref + " at " + tokIndex);
        out_set(out, o); ret;
      }
      
      if (t.equals("hashset")) ret with parseHashSet(out);
      if (t.equals("lhs")) ret with parseLinkedHashSet(out);
      if (t.equals("treeset")) ret with parseTreeSet(out);
      if (t.equals("ciset")) ret with parseCISet(out);
      
      if (eqOneOf(t, "hashmap", "hm")) {
        consume();
        parseMap(new HashMap, out);
        ret;
      }
      if (t.equals("lhm")) {
        consume();
        parseMap(new LinkedHashMap, out);
        ret;
      }
      if (t.equals("tm")) {
        consume();
        parseMap(new TreeMap, out);
        ret;
      }
      if (t.equals("cimap")) {
        consume();
        parseMap(ciMap(), out);
        ret;
      }
      
      if (t.equals("ll")) {
        consume();
        new LinkedList l;
        if (refID >= 0) refs.put(refID, l);
        ret with parseList(l, out);
      }

      if (t.equals("syncLL")) { // legacy
        consume();
        ret with parseList(synchroLinkedList(), out);
      }

      if (t.equals("sync")) {
        consume();
        ret with parse(new unstructure_Receiver {
          void set(O value) {
            if (value instanceof Map) {
              ifndef Android // Java 7
              if (value instanceof NavigableMap)
                ret with out_set(out, Collections.synchronizedNavigableMap((NavigableMap) value));
              endifndef
              if (value instanceof SortedMap)
                ret with out_set(out, Collections.synchronizedSortedMap((SortedMap) value));
              ret with out_set(out, Collections.synchronizedMap((Map) value));
            } else
              ret with out_set(out, Collections.synchronizedList((L) value);
          }
        });
      }
      
      if (t.equals("{")) {
        parseMap(out); ret;
      }
      if (t.equals("[")) {
        new ArrayList l;
        if (refID >= 0) refs.put(refID, l);
        this.parseList(l, out); ret;
      }
      if (t.equals("bitset")) {
        parseBitSet(out); ret;
      }
      if (t.equals("array") || t.equals("intarray") || t.equals("dblarray")) {
        parseArray(out); ret;
      }
      if (t.equals("ba")) {
        consume();
        S hex = unquote(tpp());
        out_set(out, hexToBytes(hex)); ret;
      }
      if (t.equals("boolarray")) {
        consume();
        int n = parseInt(tpp());
        S hex = unquote(tpp());
        out_set(out, boolArrayFromBytes(hexToBytes(hex), n)); ret;
      }
      if (t.equals("class")) {
        out_set(out, parseClass()); ret;
      }
      if (t.equals("l")) {
        parseLisp(out); ret;
      }
      if (t.equals("null")) {
        consume(); out_set(out, null); ret;
      }
      
      if (eq(t, "c")) {
        consume();
        t = t();
        assertTrue(isJavaIdentifier(t));
        concepts.add(t);
      }
      
      // custom deserialization (new static method method)
      if (eq(t, "cu")) {
        consume();
        t = tpp();
        assertTrue(isJavaIdentifier(t));
        S fullClassName = "main$" + t;
        Class _c = allDynamic ? null : findAClass(fullClassName);
        if (_c == null) fail("Class not found: " + fullClassName);
        parse(new unstructure_Receiver {
          void set(O value) {
            ifdef unstructure_debug
              print("Consumed custom object, next token: " + t());
            endifdef
            out_set(out, call(_c, "_deserialize", value);
          }
        });
        ret;
      }
    }
    
    if (eq(t, "j")) {
      consume("j");
      out_set(out, parseJava()); ret;
    }

    if (c == null && !isJavaIdentifier(t))
      throw new RuntimeException("Unknown token " + (i+1) + ": " + quote(t));
      
    // any other class name (or package name)
    consume();
    S className, fullClassName;
    
    // Is it a package name?
    if (eq(t(), ".")) {
      consume();
      className = fullClassName = t + "." + assertIdentifier(tpp());
    } else {
      className = t;
      fullClassName = "main$" + t;
    }
    
    if (c == null) {
      // First, find class
      ifdef unstructure_debug
        print("Finding class " + fullClassName + " (allDynamic=" + allDynamic + ")");
      endifdef
      if (allDynamic) c = null;
      else c = findAClass(fullClassName);
      if (c != null)
        classesMap.put(className, c);
    }
        
    // Check if it has an outer reference
    bool hasBracket = eq(t(), "(");
    if (hasBracket) consume();
    bool hasOuter = hasBracket && eq(t(), "this$1");
    
    DynamicObject dO = null;
    O o = null;
    fS thingName = t;
    if (c != null) {
      o = hasOuter ? nuStubInnerObject(c, classFinder) : nuEmptyObject(c);
      if (o instanceof DynamicObject) dO = (DynamicObject) o;
    } else {
      if (concepts.contains(t) && (c = findAClass("main$Concept")) != null)
        o = dO = (DynamicObject) nuEmptyObject(c);
      else
        dO = new DynamicObject;
      dO.className = className;
      ifdef unstructure_debug
        print("Made dynamic object " + t + " " + shortClassName(dO));
      endifdef
    }
    
    // Save in references list early because contents of object
    // might link back to main object
    
    if (refID >= 0)
      refs.put(refID, o != null ? o : dO);
    tokrefs.put(tokIndex, o != null ? o : dO);
    
    // NOW parse the fields!
    
    new /*Linked*/HashMap<S, O> fields; // no longer preserving order (why did we do this?)
    O _o = o;
    DynamicObject _dO = dO;
    if (hasBracket) {
      stack.add(r {
        ifdef unstructure_debug
          print("in object values, token: " + t());
        endifdef
        if (eq(t(), ",")) consume();
        if (eq(t(), ")")) {
          consume(")");
          objRead(_o, _dO, fields, hasOuter);
          out_set(out, _o != null ? _o : _dO);
        } else {
          final S key = unquote(tpp());
          S t = tpp();
          if (!eq(t, "="))
            fail("= expected, got " + t + " after " + quote(key) + " in object " + thingName /*+ " " + sfu(fields)*/);
          stack.add(this);
          parse(new unstructure_Receiver {
            void set(O value) {
              fields.put(key, value);
              /*ifdef unstructure_debug
                print("Got field value " + value + ", next token: " + t());
              endifdef*/
              //if (eq(t(), ",")) consume();
            }
          });
        }
      });
    } else {
      objRead(o, dO, fields, hasOuter);
      out_set(out, o != null ? o : dO);
    }
  }
  
  void objRead(O o, DynamicObject dO, Map<S, O> fields, bool hasOuter) {
    ifdef unstructure_debug
    print("objRead " + className(o) + " " + className(dO) + " " + struct(fields));
    endifdef
    if (o != null) {
      if (dO != null) {
        ifdef unstructure_debug
          printStructure("setOptAllDyn", fields);
        endifdef
        setOptAllDyn_pcall(dO, fields);
      } else {
        setOptAll_pcall(o, fields);
        ifdef unstructure_debug
          print("objRead now: " + struct(o));
        endifdef
      }
      if (hasOuter)
        fixOuterRefs(o);
    } else for (Map.Entry<S, O> e : fields.entrySet())
      setDynObjectValue(dO, intern(e.getKey()), e.getValue());

    if (o != null)
      pcallOpt_noArgs(o, "_doneLoading");
  }
  
  void parseSet(final Set set, final unstructure_Receiver out) {
    this.parseList(new ArrayList, new unstructure_Receiver {
      void set(O o) {
        set.addAll((L) o);
        out_set(out, set);
      }
    });
  }
  
  void parseLisp(final unstructure_Receiver out) {
    ifclass Lisp
      consume("l");
      consume("(");
      final new ArrayList list;
      stack.add(r {
        if (eq(t(), ")")) {
          consume(")");
          out_set(out, Lisp((S) list.get(0), subList(list, 1)));
        } else {
          stack.add(this);
          parse(new unstructure_Receiver {
            void set(O o) {
              list.add(o);
              if (eq(t(), ",")) consume();
            }
          });
        }
      });
      if (false) // skip fail line
    endif
    
    fail("class Lisp not included");
  }
  
  void parseBitSet(final unstructure_Receiver out) {
    consume("bitset");
    consume("{");
    final new BitSet bs;
    stack.add(r {
      if (eq(t(), "}")) {
        consume("}");
        out_set(out, bs);
      } else {
        stack.add(this);
        parse(new unstructure_Receiver {
          void set(O o) {
            bs.set((Integer) o);
            if (eq(t(), ",")) consume();
          }
        });
      }
    });
  }
  
  void parseList(final L list, final unstructure_Receiver out) {
    tokrefs.put(i, list);
    consume("[");
    stack.add(r {
      if (eq(t(), "]")) {
        consume();
        ifdef unstructure_debug
          print("Consumed list, next token: " + t());
        endifdef
        out_set(out, list);
      } else {
        stack.add(this);
        parse(new unstructure_Receiver {
          void set(O o) {
            //if (debug) print("List element type: " + getClassName(o));
            list.add(o);
            if (eq(t(), ",")) consume();
          }
        });
      }
    });
  }
  
  void parseArray(final unstructure_Receiver out) {
    final S type = tpp();
    consume("{");
    final List list = new ArrayList;
    
    stack.add(r {
      if (eq(t(), "}")) {
        consume("}");
        out_set(out, 
          type.equals("intarray") ? toIntArray(list)
          : type.equals("dblarray") ? toDoubleArray(list)
          : list.toArray());
      } else {
        stack.add(this);
        parse(new unstructure_Receiver {
          void set(O o) {
            list.add(o);
            if (eq(t(), ",")) consume();
          }
        });
      }
    });
  }
  
  Object parseClass() {
    consume("class");
    consume("(");
    S name = unquote(tpp());
    consume(")");
    Class c = allDynamic ? null : findAClass(name);
    if (c != null) ret c;
    new DynamicObject dO;
    dO.className = "java.lang.Class";
    name = dropPrefix("main$", name);
    dO.fieldValues.put("name", name);
    ret dO;
  }
  
  Object parseBigInt() {
    consume("bigint");
    consume("(");
    S val = tpp();
    if (eq(val, "-"))
      val = "-" + tpp();
    consume(")");
    ret new BigInteger(val);
  }
  
  Object parseDouble() {
    consume("d");
    consume("(");
    S val = unquote(tpp());
    consume(")");
    ret Double.parseDouble(val);
  }
  
  Object parseFloat() {
    consume("fl");
    S val;
    if (eq(t(), "(")) {
      consume("(");
      val = unquote(tpp());
      consume(")");
    } else {
      val = unquote(tpp());
    }
    ret Float.parseFloat(val);
  }
  
  void parseHashSet(unstructure_Receiver out) {
    consume("hashset");
    parseSet(new HashSet, out);
  }
  
  void parseLinkedHashSet(unstructure_Receiver out) {
    consume("lhs");
    parseSet(new LinkedHashSet, out);
  }
  
  void parseTreeSet(unstructure_Receiver out) {
    consume("treeset");
    parseSet(new TreeSet, out);
  }
  
  void parseCISet(unstructure_Receiver out) {
    consume("ciset");
    parseSet(ciSet(), out);
  }
  
  void parseMap(unstructure_Receiver out) {
    parseMap(new TreeMap, out);
  }
  
  O parseJava() {
    S j = unquote(tpp());
    new Matches m;
    if (jmatch("java.awt.Color[r=*,g=*,b=*]", j, m))
      ret nuObject("java.awt.Color", parseInt($1), parseInt($2), parseInt($3));
    else {
      warn("Unknown Java object: " + j);
      null;
    }
  }
  
  void parseMap(final Map map, final unstructure_Receiver out) {
    consume("{");
    stack.add(new Runnable {
      bool v;
      O key;
      
      public void run() { 
        if (v) {
          v = false;
          stack.add(this);
          if (!eq(tpp(), "="))
            fail("= expected, got " + t() + " in map of size " + l(map));

          parse(new unstructure_Receiver {
            void set(O value) {
              map.put(key, value);
              ifdef unstructure_debug
                print("parseMap: Got value " + getClassName(value) + ", next token: " + quote(t()));
              endifdef
              if (eq(t(), ",")) consume();
            }
          });
        } else {
          if (eq(t(), "}")) {
            consume("}");
            out_set(out, map);
          } else {
            v = true;
            stack.add(this);
            parse(new unstructure_Receiver {
              void set(O o) {
                key = o;
              }
            });
          }
        } // if v else
      } // run()
    });
  }
  
  /*void parseSub(unstructure_Receiver out) {
    int n = l(stack);
    parse(out);
    while (l(stack) > n)
      stack
  }*/
  
  void consume() { curT = tok.next(); ++i; }
  
  void consume(S s) {
    if (!eq(t(), s)) {
      /*S prevToken = i-1 >= 0 ? tok.get(i-1) : "";
      S nextTokens = join(tok.subList(i, Math.min(i+2, tok.size())));
      fail(quote(s) + " expected: " + prevToken + " " + nextTokens + " (" + i + "/" + tok.size() + ")");*/
      fail(quote(s) + " expected, got " + quote(t()));
    }
    consume();
  }
  
  // outer wrapper function getting first token and unwinding the stack
  void parse_initial(unstructure_Receiver out) {
    consume(); // get first token
    parse(out);
    while (nempty(stack))
      popLast(stack).run();
  }
}


sO unstructure_tok(Producer<S> tok, bool allDynamic, O _classFinder, unstructure_Data data default new unstructure_Data) {
  final boolean debug = unstructure_debug;
  
  ThreadLocal<Bool> tlLoading = dynamicObjectIsLoading_threadLocal();
  Bool b = tlLoading!;
  tlLoading.set(true);
  try {
    new Var v;
    data.tok = tok;
    data.allDynamic = allDynamic;
    if (data.classFinder == null)
      data.classFinder = _classFinder != null ? _classFinder : _defaultClassFinder();
    data.parse_initial(new unstructure_Receiver {
      void set(O o) { v.set(o); }
    });
    unstructure_tokrefs = data.tokrefs.size();
    ret v.get();
  } finally {
    tlLoading.set(b);
  }
}

static boolean unstructure_debug;

Author comment

Began life as a copy of #1025231

download  show line numbers  debug dex  old transpilations   

Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt

No comments. add comment

Snippet ID: #1030718
Snippet name: unstructure (v17, more customizability, dev.)
Eternal ID of this version: #1030718/11
Text MD5: a9cd2f909bbe27bf30658ddb1ec88425
Transpilation MD5: cb29ec1839b1cc17da36ee49091d6dcc
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2021-04-14 15:10:11
Source code size: 20119 bytes / 712 lines
Pitched / IR pitched: No / No
Views / Downloads: 178 / 314
Version history: 10 change(s)
Referenced in: #1030709 - BEA Thought Bot 1 [dev.]