LineCompReader - read LINECOMP format [1028182]

sclass LineCompReader {
  new LS literals;
  IVF1<Long> onPair; // if not null, don't save pairs, but stream them to onPair instead
  int[] literalOffsets; // where they start in file
  new LongBuffer pairs;
  new LinkedHashMap<S, L<Int>> versions;
  bool byteMode;
  
  // internal, optional
  CountingInputStream countingInputStream;
  long fileSize;
  
  *() {}
  
  // takes text or gzipped input file
  *(File f) { load(f); }
  
  *(InputStream in) { load(rawByteReader(in, 128*1024)); }
  *(BufferedReader reader) { load(reader); }
  
  void load(File f) {
    fileSize = fileSize(f);
    countingInputStream = new CountingInputStream(bufferedFileInputStream(f));
    temp BufferedReader reader = isGZipFile(f)
      ? rawByteReader(gzipInputStream(countingInputStream))
      : rawByteReader(countingInputStream);
      //rawByteReader_possiblyGZipped(f);
    load(reader);
  }
  
  void load(BufferedReader reader) ctex {
    new StringBuilder lineBuf;
    S s = readLineIgnoreCR(reader, lineBuf);
    int ofs = l(s)+1;
    new Matches m;
    if (startsWith(s, "BYTECOMP ", m)) set byteMode;
    else if (!startsWith(s, "LINECOMP ", m))
      fail("Not a LINECOMP file");
    int nLiterals = parseInt(m.rest());
    new IntBuffer offsets;
    for i to nLiterals: {
      S line = readLineIgnoreCR(reader, lineBuf);
      assertNotNull(line);
      literals.add(byteMode ? str(charFromHex(line)) : line);
      offsets.add(ofs);
      ofs += l(line)+1;
    }
    offsets.add(ofs);
    literalOffsets = offsets.toArray();
    int n = 0;
    while licensed {
      s = readLineIgnoreCR(reader, lineBuf);
      if (s == null || contains(s, "=")) break;
      try {
        int iSpace = s.indexOf(' ');
        long pair = twoIntsToLong(
          Int.parseInt(s, 0, iSpace, 10),
          Int.parseInt(s, iSpace+1, l(s), 10));
        if (onPair != null) onPair.get(pair);
        else pairs.add(pair);
        if (((++n) % oneMillion()) == 0) {
          S percentage = "";
          if (fileSize != 0 && countingInputStream != null)
            percentage = " (" + intPercentRatio(countingInputStream.getFilePointer(), fileSize) + "%)";
          print(nPairs(n) + " read" + percentage);
        }
      } on fail {
        print("On line " + (nLiterals + l(pairs)));
      }
    }
    pairs.trimToSize();
    while (contains(s, "=")) {
      int i = indexOf(s, '=');
      versions.put(takeFirst(s, i), compactIntList(parseInts(splitAtSpace(substring(s, i+1)))));
      s = readLineIgnoreCR(reader, lineBuf);
    }
  }
  
  Set<S> versions() { ret keys(versions); }
  
  S getText(S version) { ret textForVersion(version); }
  S textForVersion(S version) {
    L<Int> encoded = versions.get(version);
    if (encoded == null) null;
    new LS buf;
    for (int idx : encoded)
      decode(idx, buf);
    ret myFromLines(buf);
  }
  
  // name of first (or only) file
  S firstFile() { ret first(versions()); }
  
  // text for first (or only) file
  S text() { ret getText(firstFile()); }
  
  L<Int> encoding() { ret versions.get(firstFile()); }
  
  S myFromLines(LS l) {
    ret byteMode
      ? join(l)
      : fromLines_rtrim(l);
  }
  
  void decode(int idx, LS buf) {
    if (idx < l(literals))
      buf.add(literals.get(idx));
    else {
      long p = pairs.get(idx-l(literals));
      decode(firstIntFromLong(p), buf);
      decode(secondIntFromLong(p), buf);
    }
  }
  
  // That was it! The rest of this file is just for calculating some stats.
  
  new Map<Int> lineCountsForPairs;
  new Map<Int, Long> byteCountsForPairs;

  int lineCountForPointer(int idx) {
    ret idx < l(literals) ? 1 : lineCountForPair(idx);
  }
  
  long byteCountForPointer(int idx) {
    ret idx < l(literals) ? l(literals.get(idx))+1 : byteCountForPair(idx);
  }
  
  int lineCountForPair(int idx) {
    Int c = lineCountsForPairs.get(idx);
    if (c == null) {
      long p = pairs.get(idx-l(literals));
      c = lineCountForPointer(firstIntFromLong(p)) + lineCountForPointer(secondIntFromLong(p));
      lineCountsForPairs.put(idx, c);
    }
    ret c;
  }
  
  long byteCountForPair(int idx) {
    Long c = byteCountsForPairs.get(idx);
    if (c == null) {
      long p = pairs.get(idx-l(literals));
      c = byteCountForPointer(firstIntFromLong(p)) + byteCountForPointer(secondIntFromLong(p));
      byteCountsForPairs.put(idx, c);
    }
    ret c;
  }
  
  int lineCountForVersion(S version) {
    L<Int> encoded = versions.get(version);
    if (encoded == null) ret 0;
    int n = 0;
    for (int i : encoded) n += lineCountForPointer(i);
    ret n;
  }
  
  long byteCountForVersion(S version) {
    L<Int> encoded = versions.get(version);
    if (encoded == null) ret 0;
    long n = 0;
    for (int i : encoded) n += byteCountForPointer(i);
    ret max(0, n-1);
  }
  
  long totalByteCount() {
    ret longSum(lambdaMap byteCountForVersion(versions()));
  }
  
  // now we can also save again
  
  void save(PrintWriter out) {
    out.println((byteMode ? "BYTECOMP " : "LINECOMP ") + l(literals));
    for (S s : literals)
      out.println(byteMode ? charToHex(first(s)) : s);
    for (long p : pairs)
      out.println(firstIntFromLong(p) + " " + secondIntFromLong(p));
    for (S id, L<Int> l : versions)
      out.println(id + "=" + joinWithSpace(l));
  }  
}

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

1	sclass LineCompReader {
2	new LS literals;
3	IVF1<Long> onPair; // if not null, don't save pairs, but stream them to onPair instead
4	int[] literalOffsets; // where they start in file
5	new LongBuffer pairs;
6	new LinkedHashMap<S, L<Int>> versions;
7	bool byteMode;
8
9	// internal, optional
10	CountingInputStream countingInputStream;
11	long fileSize;
12
13	*() {}
14
15	// takes text or gzipped input file
16	*(File f) { load(f); }
17
18	(InputStream in) { load(rawByteReader(in, 1281024)); }
19	*(BufferedReader reader) { load(reader); }
20
21	void load(File f) {
22	fileSize = fileSize(f);
23	countingInputStream = new CountingInputStream(bufferedFileInputStream(f));
24	temp BufferedReader reader = isGZipFile(f)
25	? rawByteReader(gzipInputStream(countingInputStream))
26	: rawByteReader(countingInputStream);
27	//rawByteReader_possiblyGZipped(f);
28	load(reader);
29	}
30
31	void load(BufferedReader reader) ctex {
32	new StringBuilder lineBuf;
33	S s = readLineIgnoreCR(reader, lineBuf);
34	int ofs = l(s)+1;
35	new Matches m;
36	if (startsWith(s, "BYTECOMP ", m)) set byteMode;
37	else if (!startsWith(s, "LINECOMP ", m))
38	fail("Not a LINECOMP file");
39	int nLiterals = parseInt(m.rest());
40	new IntBuffer offsets;
41	for i to nLiterals: {
42	S line = readLineIgnoreCR(reader, lineBuf);
43	assertNotNull(line);
44	literals.add(byteMode ? str(charFromHex(line)) : line);
45	offsets.add(ofs);
46	ofs += l(line)+1;
47	}
48	offsets.add(ofs);
49	literalOffsets = offsets.toArray();
50	int n = 0;
51	while licensed {
52	s = readLineIgnoreCR(reader, lineBuf);
53	if (s == null \|\| contains(s, "=")) break;
54	try {
55	int iSpace = s.indexOf(' ');
56	long pair = twoIntsToLong(
57	Int.parseInt(s, 0, iSpace, 10),
58	Int.parseInt(s, iSpace+1, l(s), 10));
59	if (onPair != null) onPair.get(pair);
60	else pairs.add(pair);
61	if (((++n) % oneMillion()) == 0) {
62	S percentage = "";
63	if (fileSize != 0 && countingInputStream != null)
64	percentage = " (" + intPercentRatio(countingInputStream.getFilePointer(), fileSize) + "%)";
65	print(nPairs(n) + " read" + percentage);
66	}
67	} on fail {
68	print("On line " + (nLiterals + l(pairs)));
69	}
70	}
71	pairs.trimToSize();
72	while (contains(s, "=")) {
73	int i = indexOf(s, '=');
74	versions.put(takeFirst(s, i), compactIntList(parseInts(splitAtSpace(substring(s, i+1)))));
75	s = readLineIgnoreCR(reader, lineBuf);
76	}
77	}
78
79	Set<S> versions() { ret keys(versions); }
80
81	S getText(S version) { ret textForVersion(version); }
82	S textForVersion(S version) {
83	L<Int> encoded = versions.get(version);
84	if (encoded == null) null;
85	new LS buf;
86	for (int idx : encoded)
87	decode(idx, buf);
88	ret myFromLines(buf);
89	}
90
91	// name of first (or only) file
92	S firstFile() { ret first(versions()); }
93
94	// text for first (or only) file
95	S text() { ret getText(firstFile()); }
96
97	L<Int> encoding() { ret versions.get(firstFile()); }
98
99	S myFromLines(LS l) {
100	ret byteMode
101	? join(l)
102	: fromLines_rtrim(l);
103	}
104
105	void decode(int idx, LS buf) {
106	if (idx < l(literals))
107	buf.add(literals.get(idx));
108	else {
109	long p = pairs.get(idx-l(literals));
110	decode(firstIntFromLong(p), buf);
111	decode(secondIntFromLong(p), buf);
112	}
113	}
114
115	// That was it! The rest of this file is just for calculating some stats.
116
117	new Map<Int> lineCountsForPairs;
118	new Map<Int, Long> byteCountsForPairs;
119
120	int lineCountForPointer(int idx) {
121	ret idx < l(literals) ? 1 : lineCountForPair(idx);
122	}
123
124	long byteCountForPointer(int idx) {
125	ret idx < l(literals) ? l(literals.get(idx))+1 : byteCountForPair(idx);
126	}
127
128	int lineCountForPair(int idx) {
129	Int c = lineCountsForPairs.get(idx);
130	if (c == null) {
131	long p = pairs.get(idx-l(literals));
132	c = lineCountForPointer(firstIntFromLong(p)) + lineCountForPointer(secondIntFromLong(p));
133	lineCountsForPairs.put(idx, c);
134	}
135	ret c;
136	}
137
138	long byteCountForPair(int idx) {
139	Long c = byteCountsForPairs.get(idx);
140	if (c == null) {
141	long p = pairs.get(idx-l(literals));
142	c = byteCountForPointer(firstIntFromLong(p)) + byteCountForPointer(secondIntFromLong(p));
143	byteCountsForPairs.put(idx, c);
144	}
145	ret c;
146	}
147
148	int lineCountForVersion(S version) {
149	L<Int> encoded = versions.get(version);
150	if (encoded == null) ret 0;
151	int n = 0;
152	for (int i : encoded) n += lineCountForPointer(i);
153	ret n;
154	}
155
156	long byteCountForVersion(S version) {
157	L<Int> encoded = versions.get(version);
158	if (encoded == null) ret 0;
159	long n = 0;
160	for (int i : encoded) n += byteCountForPointer(i);
161	ret max(0, n-1);
162	}
163
164	long totalByteCount() {
165	ret longSum(lambdaMap byteCountForVersion(versions()));
166	}
167
168	// now we can also save again
169
170	void save(PrintWriter out) {
171	out.println((byteMode ? "BYTECOMP " : "LINECOMP ") + l(literals));
172	for (S s : literals)
173	out.println(byteMode ? charToHex(first(s)) : s);
174	for (long p : pairs)
175	out.println(firstIntFromLong(p) + " " + secondIntFromLong(p));
176	for (S id, L<Int> l : versions)
177	out.println(id + "=" + joinWithSpace(l));
178	}
179	}

Snippet ID:	#1028182
Snippet name:	LineCompReader - read LINECOMP format
Eternal ID of this version:	#1028182/59
Text MD5:	34176f987e66773e35d6946d5c139f00
Transpilation MD5:	f50c04c11824feba0e4ed0b675fa5957
Author:	stefan
Category:	javax
Type:	JavaX fragment (include)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-08-06 15:05:30
Source code size:	5454 bytes / 179 lines
Pitched / IR pitched:	No / No
Views / Downloads:	479 / 1042
Version history:	58 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1028182 // LineCompReader - read LINECOMP format

JavaX fragment (include) [tags: use-pretranspiled]