Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

864
LINES

< > BotCompany Repo | #1035798 // unstructure (v20, introducing the Unstructurer class, LIVE)

JavaX fragment (include) [tags: use-pretranspiled]

Transpiled version (11875L) is out of date.

scope unstructure

// TODO: cyclic structures involving certain lists & sets

ifdef UseEclipseCollections
!include once #1027304 // Eclipse Collections
endifdef

sO unstructure(String text) {
  ret unstructure(text, false);
}

sO unstructure(S text, bool allDynamic) {
  ret unstructure(text, allDynamic, null);
}

sO unstructure(S text, IF1<S, Class> classFinder) {
  ret unstructure(text, false, classFinder);
}

static int structure_internStringsLongerThan = 50;
static int unstructure_unquoteBufSize = 100;

static int unstructure_tokrefs; // stats

abstract sclass unstructure_Receiver {
  abstract void set(O o);
}

// classFinder: func(name) -> class (optional)
static Object unstructure(String text, boolean allDynamic,
  O classFinder) {
  if (text == null) ret null;
  ret unstructure_tok(javaTokC_noMLS_iterator(text), allDynamic, classFinder);
}

static O unstructure_reader(BufferedReader reader) {
  ret unstructure_tok(javaTokC_noMLS_onReader(reader), false, null);
}

interface #Handler {
  void parse(int refID, int tokIndex, unstructure_Receiver out);
}
  
sclass Unstructurer {
  settable Producer<S> tok;
  settable bool allDynamic;
  int i = -1;
  O classFinder;
  S mcDollar = actualMCDollar();
  
  selfType classFinder(O _classFinder) {
    classFinder = _classFinder != null ? _classFinder : _defaultClassFinder();
    this;
  }

  // use Eclipse primitive collection if possible (smaller & hopefully faster?)
  ifdef UseEclipseCollections
  new IntObjectHashMap<O> refs;
  new IntObjectHashMap<O> tokrefs;
  endifdef
  ifndef UseEclipseCollections
  new HashMap<Integer, O> refs;
  new HashMap<Integer, O> tokrefs;
  endifndef
  
  new HashSet<S> concepts;
  new L<Runnable> stack;
  new SS baseClassMap;
  new HashMap<Class, Constructor> innerClassConstructors;
  S curT;
  char[] unquoteBuf = new char[unstructure_unquoteBufSize];
  
  // value is a class or a Handler
  final new HashMap<S, O> handlers;
  
  *() {
    pcall {
      Class mc = cast callF(classFinder, "<main>");
      if (mc != null) mcDollar = mc.getName() + "$";
    }
    
    makeHandlers();
  }
  
  replace HandlerArgs with (Handler) (refID, tokIndex, out).
  
  void makeHandlers {
    Handler h;
    
    handlers.put("bigint", HandlerArgs
      -> out.set(parseBigInt()));
      
    handlers.put("d", HandlerArgs
      -> out.set(parseDouble()));
      
    handlers.put("fl", HandlerArgs
      -> out.set(parseFloat()));
      
    handlers.put("sh", HandlerArgs -> {
      consume();
      S t = tpp();
      if (t.equals("-")) {
        t = tpp();
        out.set((short) (-parseInt(t)); ret;
      }
      out.set((short) parseInt(t));
    });
    
    handlers.put("enum", HandlerArgs -> {
      consume();
      S t = tpp();
      assertTrue(isJavaIdentifier(t));
      S fullClassName = mcDollar + t;
      Class _c = findAClass(fullClassName);
      if (_c == null) fail("Enum class not found: " + fullClassName);
      int ordinal = parseInt(tpp());
      out.set(_c.getEnumConstants()[ordinal]);
    });

    handlers.put("false", h = HandlerArgs -> {
      consume(); out.set(false);
    });
    handlers.put("f", h);
    
    handlers.put("true", h = HandlerArgs -> {
      consume(); out.set(true);
    });
    handlers.put("t", h);
    
    handlers.put("{", HandlerArgs -> parseMap(out));
    
    handlers.put("[", HandlerArgs -> {
      new ArrayList l;
      if (refID >= 0) refs.put(refID, l);
      this.parseList(l, out);
    });
    
    handlers.put("bitset", HandlerArgs -> parseBitSet(out));
    
    handlers.put("array", h = HandlerArgs -> parseArray(out));
    handlers.put("intarray", h);
    handlers.put("dblarray", h);
    handlers.put("floatarray", h);
    
    handlers.put("shortarray", HandlerArgs -> {
      consume();
      S hex = trivialUnquote(tpp());
      out.set(shortArrayFromBytes(hexToBytes(hex)));
    });
    
    handlers.put("longarray", HandlerArgs -> {
      consume();
      S hex = trivialUnquote(tpp());
      out.set(longArrayFromBytes(hexToBytes(hex)));
    });
  } // end of makeHandlers - add more handlers here

  Class findAClass(S fullClassName) null on exception {
    ret classFinder != null ? (Class) callF(classFinder, fullClassName) : findClass_fullName(fullClassName);
  }
  
  S unquote(S s) {
    ret unquoteUsingCharArray(s, unquoteBuf); 
  }

  // look at current token
  S t() {
    ret curT;
  }
  
  // get current token, move to next
  S tpp() {
    S t = curT;
    consume();
    ret t;
  }
  
  void parse(final unstructure_Receiver out) {
    S t = t();
    
    int refID;
    if (structure_isMarker(t, 0, l(t))) {
      refID = parseInt(t.substring(1));
      consume();
    } else refID = -1;
    
    // if (debug) print("parse: " + quote(t));
    
    final int tokIndex = i;  
    parse_inner(refID, tokIndex, new unstructure_Receiver {
      void set(O o) {
        if (refID >= 0)
          refs.put(refID, o);
        if (o != null)
          tokrefs.put(tokIndex, o);
        out.set(o);
      }
    });
  }
  
  void parse_inner(int refID, int tokIndex, unstructure_Receiver out) {
    S t = t();
    
    // if (debug) print("parse_inner: " + quote(t));
    
    O handler = handlers.get(t);
    if (handler cast Handler)
      ret with handler.parse(refID, tokIndex, out);
      
    Class c = cast handler;
    if (c == null) {
      if (t.startsWith("\"")) {
        S s = internIfLongerThan(unquote(tpp()), structure_internStringsLongerThan);
        out.set(s); ret;
      }
      
      if (t.startsWith("'")) {
        out.set(unquoteCharacter(tpp())); ret;
      }
      if (t.equals("-")) {
        consume();
        t = tpp();
        out.set(isLongConstant(t) ? (O) (-parseLong(t)) : (O) (-parseInt(t))); ret;
      }
      if (isInteger(t) || isLongConstant(t)) {
        consume();
        //if (debug) print("isLongConstant " + quote(t) + " => " + isLongConstant(t));
        if (isLongConstant(t)) {
          out.set(parseLong(t)); ret;
        }
        long l = parseLong(t);
        bool isInt = l == (int) l;
        ifdef unstructure_debug
          print("l=" + l + ", isInt: " + isInt);
        endifdef
        out.set(isInt ? (O) Integer.valueOf((int) l) : (O) Long.valueOf(l)); ret;
      }
      if (t.equals("-")) {
        consume();
        t = tpp();
        out.set(isLongConstant(t) ? (O) (-parseLong(t)) : (O) (-parseInt(t))); ret;
      }
      if (isInteger(t) || isLongConstant(t)) {
        consume();
        //if (debug) print("isLongConstant " + quote(t) + " => " + isLongConstant(t));
        if (isLongConstant(t)) {
          out.set(parseLong(t)); ret;
        }
        long l = parseLong(t);
        bool isInt = l == (int) l;
        ifdef unstructure_debug
          print("l=" + l + ", isInt: " + isInt);
        endifdef
        out.set(isInt ? (O) Integer.valueOf((int) l) : (O) Long.valueOf(l)); ret;
      }
      
      if (t.equals("File")) {
        consume();
        File f = new File(unquote(tpp()));
        out.set(f); ret;
      }
      
      if (t.startsWith("r") && isInteger(t.substring(1))) {
        consume();
        int ref = Integer.parseInt(t.substring(1));
        O o = refs.get(ref);
        if (o == null)
          warn("unsatisfied back reference " + ref);
        out.set(o); ret;
      }
    
      if (t.startsWith("t") && isInteger(t.substring(1))) {
        consume();
        int ref = Integer.parseInt(t.substring(1));
        O o = tokrefs.get(ref);
        if (o == null)
          warn("unsatisfied token reference " + ref + " at " + tokIndex);
        out.set(o); ret;
      }
      
      if (t.equals("hashset")) ret with parseHashSet(out);
      if (t.equals("lhs")) ret with parseLinkedHashSet(out);
      if (t.equals("treeset")) ret with parseTreeSet(out);
      if (t.equals("ciset")) ret with parseCISet(out);
      
      if (eqOneOf(t, "hashmap", "hm")) {
        consume();
        parseMap(new HashMap, out);
        ret;
      }
      if (t.equals("lhm")) {
        consume();
        parseMap(new LinkedHashMap, out);
        ret;
      }
      if (t.equals("tm")) {
        consume();
        parseMap(new TreeMap, out);
        ret;
      }
      if (t.equals("cimap")) {
        consume();
        parseMap(ciMap(), out);
        ret;
      }
      
      if (t.equals("ll")) {
        consume();
        new LinkedList l;
        if (refID >= 0) refs.put(refID, l);
        ret with parseList(l, out);
      }

      if (t.equals("syncLL")) { // legacy
        consume();
        ret with parseList(synchroLinkedList(), out);
      }

      if (t.equals("sync")) {
        consume();
        ret with parse(new unstructure_Receiver {
          void set(O value) {
            if (value instanceof Map) {
              ifndef Android // Java 7
              if (value cast NavigableMap)
                ret with out.set(synchroNavigableMap(value);
              endifndef
              if (value cast SortedMap)
                ret with out.set(synchroSortedMap(value);
              ret with out.set(synchroMap((Map) value));
            } else
              ret with out.set(synchroList((L) value);
          }
        });
      }
      
      if (t.equals("ba")) {
        consume();
        S hex = trivialUnquote(tpp());
        out.set(hexToBytes(hex)); ret;
      }
      if (t.equals("boolarray")) {
        consume();
        int n = parseInt(tpp());
        S hex = trivialUnquote(tpp());
        out.set(boolArrayFromBytes(hexToBytes(hex), n)); ret;
      }
      if (t.equals("class")) {
        out.set(parseClass()); ret;
      }
      if (t.equals("l")) {
        parseLisp(out); ret;
      }
      if (t.equals("null")) {
        consume(); out.set(null); ret;
      }
      
      if (eq(t, "c")) {
        consume();
        t = t();
        assertTrue(isJavaIdentifier(t));
        concepts.add(t);
      }
      
      // custom deserialization (new static method method)
      if (eq(t, "cu")) {
        consume();
        t = tpp();
        assertTrue(isJavaIdentifier(t));
        S fullClassName = mcDollar + t;
        Class _c = findAClass(fullClassName);
        if (_c == null) fail("Class not found: " + fullClassName);
        parse(new unstructure_Receiver {
          void set(O value) {
            ifdef unstructure_debug
              print("Consumed custom object, next token: " + t());
            endifdef
            out.set(call(_c, "_deserialize", value);
          }
        });
        ret;
      }
    }
    
    if (eq(t, "j")) {
      consume();
      out.set(parseJava()); ret;
    }
    
    if (eq(t, "bc")) {
      consume();
      S c1 = tpp();
      S c2 = tpp();
      baseClassMap.put(c1, c2);
      ret with parse_inner(refID, i, out);
    }
    
    // add more tokens here

    // Now we want to find our target class c
    // Have we failed to look up the class before?
    //bool seenBefore = handlers.containsKey(cname);

    // If we have seen the class before, we skip all of this
    // and simply leave c as null
    // TODO - how do we fill className?
    //if (!seenBefore) {
      if (c == null && !isJavaIdentifier(t))
        throw new RuntimeException("Unknown token " + (i+1) + ": " + quote(t));
        
      // any other class name (or package name)
      consume();
      S className, fullClassName;
      
      // Is it a package name?
      if (eq(t(), ".")) {
        className = t;
        do {
          consume();
          className += "." + assertIdentifier(tpp());
        } while (eq(t(), "."));
        fullClassName = className;
      } else {
        className = t;
        fullClassName = mcDollar + t;
      }
      
      if (c == null && !allDynamic) {
        // First, find class
        c = findAClass(fullClassName);
        handlers.put(className, c);
      }
      
      // check for existing base class
      if (c == null && !allDynamic) {
        new Set<S> seen;
        S parent = className;
        while true {
          S baseName = baseClassMap.get(parent);
          if (baseName == null)
            break;
          if (!seen.add(baseName))
            fail("Cyclic superclass info: " + baseName);
          c = findAClass(mcDollar + baseName);
          if (c == null)
            print("Base class " + baseName + " of " + parent +  " doesn't exist either");
          else if (isAbstract(c))
            print("Can't instantiate abstract base class: " + c);
          else {
            printVars_str("Reverting to base class", +className, +baseName, +c);
            handlers.put(className, c);
            break;
          }
          parent = baseName;
        }
      }
    //}
        
    // Check if it has an outer reference
    bool hasBracket = eq(t(), "(");
    if (hasBracket) consume();
    bool hasOuter = hasBracket && startsWith(t(), "this$");
    
    DynamicObject dO = null;
    O o = null;
    fS thingName = t;
    pcall {
      if (c != null) {
        if (hasOuter) try {
          Constructor ctor = innerClassConstructors.get(c);
          if (ctor == null)
            innerClassConstructors.put(c, ctor = nuStubInnerObject_findConstructor(c, classFinder));
          o = ctor.newInstance(new O[] {null});
        } catch Exception e {
          print("Error deserializing " + c + ": " + e);
          o = nuEmptyObject(c);
        } else
          o = nuEmptyObject(c);
        if (o instanceof DynamicObject) dO = (DynamicObject) o;
      } else {
        if (concepts.contains(t) && (c = findAClass(mcDollar + "Concept")) != null)
          o = dO = (DynamicObject) nuEmptyObject(c);
        else
          dO = new DynamicObject;
        dO.className = className;
        ifdef unstructure_debug
          print("Made dynamic object " + t + " " + shortClassName(dO));
        endifdef
      }
    } // end of pcall
    
    // Creating instance failed? Use DynamicObject
    if (o == null && dO == null)
      dO = new DynamicObject;
    
    // Save in references list early because contents of object
    // might link back to main object
    
    if (refID >= 0)
      refs.put(refID, o != null ? o : dO);
    tokrefs.put(tokIndex, o != null ? o : dO);
    
    // NOW parse the fields!
    
    new /*Linked*/HashMap<S, O> fields; // no longer preserving order (why did we do this?)
    O _o = o;
    DynamicObject _dO = dO;
    if (hasBracket) {
      stack.add(r {
        ifdef unstructure_debug
          print("in object values, token: " + t());
        endifdef
        if (eq(t(), ",")) consume();
        if (eq(t(), ")")) {
          consume(")");
          objRead(_o, _dO, fields, hasOuter);
          out.set(_o != null ? _o : _dO);
        } else {
          final S key = unquote(tpp());
          S t = tpp();
          if (!eq(t, "="))
            fail("= expected, got " + t + " after " + quote(key) + " in object " + thingName /*+ " " + sfu(fields)*/);
          stack.add(this);
          parse(new unstructure_Receiver {
            void set(O value) {
              fields.put(key, value);
              /*ifdef unstructure_debug
                print("Got field value " + value + ", next token: " + t());
              endifdef*/
              //if (eq(t(), ",")) consume();
            }
          });
        }
      });
    } else {
      objRead(o, dO, fields, hasOuter);
      out.set(o != null ? o : dO);
    }
  }
  
  void objRead(O o, DynamicObject dO, MapSO fields, bool hasOuter) {
    ifdef unstructure_debug
    print("objRead " + className(o) + " " + className(dO) + " " + struct(fields));
    endifdef
    
    // translate between diferent compilers (this$0 vs this$1)
    O outer = fields.get("this$0");
    if (outer != null) fields.put("this$1", outer);
    else {
      outer = fields.get("this$1");
      if (outer != null) fields.put("this$0", outer);
    }
    
    if (o != null) {
      if (dO != null) {
        ifdef unstructure_debug
          printStructure("setOptAllDyn", fields);
        endifdef
        setOptAllDyn_pcall(dO, fields);
      } else {
        setOptAll_pcall(o, fields);
        ifdef unstructure_debug
          print("objRead now: " + struct(o));
        endifdef
      }
      if (hasOuter)
        fixOuterRefs(o);
    } else for (Map.Entry<S, O> e : fields.entrySet())
      setDynObjectValue(dO, intern(e.getKey()), e.getValue());

    if (o != null)
      pcallOpt_noArgs(o, "_doneLoading");
  }
  
  void parseSet(final Set set, final unstructure_Receiver out) {
    this.parseList(new ArrayList, new unstructure_Receiver {
      void set(O o) {
        set.addAll((L) o);
        out.set(set);
      }
    });
  }
  
  void parseLisp(final unstructure_Receiver out) {
    ifclass Lisp
      consume("l");
      consume("(");
      final new ArrayList list;
      stack.add(r {
        if (eq(t(), ")")) {
          consume(")");
          out.set(Lisp((S) list.get(0), subList(list, 1)));
        } else {
          stack.add(this);
          parse(new unstructure_Receiver {
            void set(O o) {
              list.add(o);
              if (eq(t(), ",")) consume();
            }
          });
        }
      });
      if (false) // skip fail line
    endif
    
    fail("class Lisp not included");
  }
  
  void parseBitSet(final unstructure_Receiver out) {
    consume("bitset");
    consume("{");
    final new BitSet bs;
    stack.add(r {
      if (eq(t(), "}")) {
        consume("}");
        out.set(bs);
      } else {
        stack.add(this);
        parse(new unstructure_Receiver {
          void set(O o) {
            bs.set((Integer) o);
            if (eq(t(), ",")) consume();
          }
        });
      }
    });
  }
  
  void parseList(final L list, final unstructure_Receiver out) {
    tokrefs.put(i, list);
    consume("[");
    stack.add(r {
      if (eq(t(), "]")) {
        consume();
        ifdef unstructure_debug
          print("Consumed list, next token: " + t());
        endifdef
        out.set(list);
      } else {
        stack.add(this);
        parse(new unstructure_Receiver {
          void set(O o) {
            //if (debug) print("List element type: " + getClassName(o));
            list.add(o);
            if (eq(t(), ",")) consume();
          }
        });
      }
    });
  }
  
  void parseArray(unstructure_Receiver out) {
    S _type = tpp();
    int dims;

    if (eq(t(), "S")) { // string array
      _type = "S";
      consume();
    }
    
    if (eq(t(), "/")) { // multi-dimensional array
      consume();
      dims = parseInt(tpp());
    } else
      dims = 1;
    
    consume("{");
    List list = new ArrayList;
    S type = _type;
    
    stack.add(r {
      if (eq(t(), "}")) {
        consume("}");
        if (dims > 1) {
          Class atype;
          if (type.equals("intarray")) atype = int.class;
          else if (type.equals("S")) atype = S.class;
          else todo("multi-dimensional arrays of other types");
          
          out.set(list.toArray((O[]) newMultiDimensionalOuterArray(atype, dims, l(list))));
        } else
          out.set(
            type.equals("intarray") ? toIntArray(list)
            : type.equals("dblarray") ? toDoubleArray(list)
            : type.equals("floatarray") ? toFloatArray(list)
            : type.equals("S") ? toStringArray(list)
            : list.toArray());
      } else {
        stack.add(this);
        parse(new unstructure_Receiver {
          void set(O o) {
            list.add(o);
            if (eq(t(), ",")) consume();
          }
        });
      }
    });
  }
  
  Object parseClass() {
    consume("class");
    consume("(");
    S name = unquote(tpp());
    consume(")");
    Class c = allDynamic ? null : findAClass(name);
    if (c != null) ret c;
    new DynamicObject dO;
    dO.className = "java.lang.Class";
    name = dropPrefix(mcDollar, name);
    dynamicObject_setRawFieldValue(dO, +name);
    ret dO;
  }
  
  Object parseBigInt() {
    consume("bigint");
    consume("(");
    S val = tpp();
    if (eq(val, "-"))
      val = "-" + tpp();
    consume(")");
    ret new BigInteger(val);
  }
  
  Object parseDouble() {
    consume("d");
    consume("(");
    S val = unquote(tpp());
    consume(")");
    ret Double.parseDouble(val);
  }
  
  Object parseFloat() {
    consume("fl");
    S val;
    if (eq(t(), "(")) {
      consume("(");
      val = unquote(tpp());
      consume(")");
    } else {
      val = unquote(tpp());
    }
    ret Float.parseFloat(val);
  }
  
  void parseHashSet(unstructure_Receiver out) {
    consume("hashset");
    parseSet(new HashSet, out);
  }
  
  void parseLinkedHashSet(unstructure_Receiver out) {
    consume("lhs");
    parseSet(new LinkedHashSet, out);
  }
  
  void parseTreeSet(unstructure_Receiver out) {
    consume("treeset");
    parseSet(new TreeSet, out);
  }
  
  void parseCISet(unstructure_Receiver out) {
    consume("ciset");
    parseSet(ciSet(), out);
  }
  
  void parseMap(unstructure_Receiver out) {
    parseMap(new TreeMap, out);
  }
  
  O parseJava() {
    S j = unquote(tpp());
    new Matches m;
    if (jmatch("java.awt.Color[r=*,g=*,b=*]", j, m))
      ret nuObject("java.awt.Color", parseInt($1), parseInt($2), parseInt($3));
    else {
      warn("Unknown Java object: " + j);
      null;
    }
  }
  
  void parseMap(final Map map, final unstructure_Receiver out) {
    consume("{");
    stack.add(new Runnable {
      bool v;
      O key;
      
      public void run() { 
        if (v) {
          v = false;
          stack.add(this);
          if (!eq(tpp(), "="))
            fail("= expected, got " + t() + " in map of size " + l(map));

          parse(new unstructure_Receiver {
            void set(O value) {
              map.put(key, value);
              ifdef unstructure_debug
                print("parseMap: Got value " + getClassName(value) + ", next token: " + quote(t()));
              endifdef
              if (eq(t(), ",")) consume();
            }
          });
        } else {
          if (eq(t(), "}")) {
            consume("}");
            out.set(map);
          } else {
            v = true;
            stack.add(this);
            parse(new unstructure_Receiver {
              void set(O o) {
                key = o;
              }
            });
          }
        } // if v else
      } // run()
    });
  }
  
  /*void parseSub(unstructure_Receiver out) {
    int n = l(stack);
    parse(out);
    while (l(stack) > n)
      stack
  }*/
  
  void consume() { curT = tok.next(); ++i; }
  
  void consume(S s) {
    if (!eq(t(), s)) {
      /*S prevToken = i-1 >= 0 ? tok.get(i-1) : "";
      S nextTokens = join(tok.subList(i, Math.min(i+2, tok.size())));
      fail(quote(s) + " expected: " + prevToken + " " + nextTokens + " (" + i + "/" + tok.size() + ")");*/
      fail(quote(s) + " expected, got " + quote(t()));
    }
    consume();
  }
  
  // outer wrapper function getting first token and unwinding the stack
  void parse_initial(unstructure_Receiver out) {
    consume(); // get first token
    parse(out);
    while (nempty(stack))
      popLast(stack).run();
  }
}

sO unstructure_tok(Producer<S> tok, bool allDynamic, O classFinder) {
  bool debug = unstructure_debug;
  temp tempSetTL(dynamicObjectIsLoading_threadLocal(), true);
  new Var v;
  var unstructurer = new Unstructurer()
    .tok(tok)
    .allDynamic(allDynamic)
    .classFinder(classFinder);
  unstructurer.parse_initial(new unstructure_Receiver {
    void set(O o) { v.set(o); }
  });
  unstructure_tokrefs = unstructurer.tokrefs.size();
  ret v!;
}

static boolean unstructure_debug;

Author comment

Began life as a copy of #1035484

download  show line numbers  debug dex  old transpilations   

Travelled to 1 computer(s): mqqgnosmbjvj

No comments. add comment

Snippet ID: #1035798
Snippet name: unstructure (v20, introducing the Unstructurer class, LIVE)
Eternal ID of this version: #1035798/12
Text MD5: c922d06d23970887ad522d3f99d34836
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2023-03-20 20:54:33
Source code size: 24347 bytes / 864 lines
Pitched / IR pitched: No / No
Views / Downloads: 231 / 305
Version history: 11 change(s)
Referenced in: #1006654 - Standard functions list 2 (LIVE, continuation of #761)