Uses 1059K of libraries. Click here for Pure Java version (3571L/18K).
1 | !7 |
2 | |
3 | sclass TreeEl { |
4 | new ByteBuffer a; // characters |
5 | new DoubleBuffer b; // frequencies |
6 | new IntBuffer c; // indices into the tree |
7 | } |
8 | |
9 | cmodule2 LetterPredictor > DynPrintLog { |
10 | start-thread { |
11 | ByteBuffer input2 = new(toUTF8("hello hello hello hello hello hello")); |
12 | int de1 = 3; |
13 | int de2 = 16; |
14 | S decode = "0."; |
15 | |
16 | byte[] alphabet = allBytes(); |
17 | double[] weights = { 0.916, 0.88, 0.78, 0.96, 0.9, 0.83, 0.82, 0.54, 0.49, 0.25, 0.25, 0.27, 0.33, 0.53, 0.5, 0.53, 0.72 }; |
18 | double[] weights2 = { 0.72, 0.86, 0.95, 0.93, 0.97 }; |
19 | |
20 | L<TreeEl> tree = ll(new TreeEl); |
21 | int window_start = 1; |
22 | int window_end = 15; |
23 | double low = 1; |
24 | double high = 1; |
25 | double middle = 1; |
26 | new StringBuilder compressed; |
27 | |
28 | for count2 to 50000: { |
29 | ByteBuffer window = new ByteBuffer(input2.subArray(window_start - 1, window_end)); |
30 | window_start++; |
31 | window_end++; |
32 | int char_location = 15; |
33 | int remove = 2; |
34 | int char_index = 1; |
35 | LPair<ByteBuffer, DoubleBuffer> predictions = ll( |
36 | pair(new ByteBuffer(alphabet), new DoubleBuffer(rep(0.00001, l(alphabet))))); |
37 | |
38 | int node = 0; |
39 | while (!(char_index == 0 || char_location == 0) && window_end != 16) { |
40 | if (char_location != 15) { |
41 | byte _char = window.get(char_location - 1); |
42 | char_index = tree.get(node).a.indexOf(_char) + 1; |
43 | } |
44 | char_location++; |
45 | if (char_index != 0 && char_location == 16) { |
46 | TreeEl el = tree.get(node); |
47 | predictions.add(1, pair(el.a, el.b)); |
48 | node = 0; |
49 | char_location -= remove; |
50 | remove++; |
51 | } |
52 | else if (char_index != 0) |
53 | node = tree.get(node).c.get(char_index - 1); |
54 | } |
55 | Pair<ByteBuffer, DoubleBuffer> predict = pair(new ByteBuffer, new DoubleBuffer); |
56 | int remaining = 1; |
57 | |
58 | for q over predictions: { |
59 | Pair<ByteBuffer, DoubleBuffer> j = predictions.get(q); |
60 | double sum2 = doubleSum(j.b); |
61 | double w = sum2 / (l(j.b) == 226 ? 226 |
62 | : l(j.b) < 6 ? 7 * l(j.b) * weights2[l(j.b) - 1] |
63 | : l(j.b) * 7); |
64 | w = w > 20 ? 0.96 : w > 10 ? 0.92 : w > 3 ? 0.89 |
65 | : w> 1.5 ? 0.75 : w > 1 ? 0.64 : w > 0.7 ? 0.57 |
66 | : w > 0.5 ? 0.5 : w > 0.4 ? 0.44 : w; |
67 | double _25ofRoof = (w * weights[l(predictions) - 1 - q]) * remaining; |
68 | remaining -= _25ofRoof; |
69 | for (int g = 0; g < l(j.a); g++) { |
70 | byte k = j.a.get(g); |
71 | double freq = j.b.get(g); |
72 | char_index = predict.a.indexOf(k) + 1; |
73 | if (char_index == 0) { |
74 | predict.a.add(k); |
75 | predict.b.add((freq / sum2) * _25ofRoof); |
76 | } else |
77 | predict.b.set(char_index - 1, predict.b.get(char_index - 1) + (freq / sum2) * _25ofRoof); |
78 | } |
79 | } |
80 | double summ = 1 - doubleSum(predict.b); |
81 | int nPredict = l(predict.b); |
82 | for n to nPredict: |
83 | predict.b.set(n, predict.b.get(n) + summ / l(predict.b)); |
84 | |
85 | low = high; |
86 | double decodepart = parseDouble("0." + substring(decode, de1 - 1, de2)); |
87 | for (int m = 0; m < l(predict.a); m++) { |
88 | byte x = predict.a.get(m); |
89 | high = low; |
90 | low -= predict.b.get(m) * middle; |
91 | if (eq(decode, "0.") && x == last(window)) |
92 | break; |
93 | else if (high > decodepart && decodepart > low) { |
94 | window.add(x); |
95 | input2.add(x); |
96 | break; |
97 | } |
98 | } |
99 | char_location = lCommonPrefix(str(low), str(high)); |
100 | compressed.append(substr(str(low), 2, char_location)); |
101 | char_location -= 2; |
102 | for count to char_location: { |
103 | high *= 10; |
104 | low *= 10; |
105 | de1++; |
106 | de2++; |
107 | } |
108 | middle = floor(low); |
109 | high -= middle; |
110 | low -= middle; |
111 | middle = high - low; |
112 | node = 0; |
113 | |
114 | for (byte i : window) { |
115 | TreeEl el = tree.get(node); |
116 | char_index = el.a.indexOf(i) + 1; |
117 | if (char_index == 0) { |
118 | el.a.add(i); |
119 | el.b.add(1); |
120 | el.c.add(node = l(tree)); |
121 | tree.add(new TreeEl); |
122 | } else { |
123 | el.b.set(char_index - 1, el.b.get(char_index - 1) + 1); |
124 | node = el.c.get(char_index - 1); |
125 | } |
126 | } |
127 | } |
128 | |
129 | print(+compressed); |
130 | } |
131 | } |
download show line numbers debug dex old transpilations
Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt
No comments. add comment
Snippet ID: | #1030687 |
Snippet name: | genai's letter_predictor [dev.] |
Eternal ID of this version: | #1030687/32 |
Text MD5: | 0c8d60217287450084a0bef04aac5cea |
Transpilation MD5: | 6418fa8739e5fea39adaf7acd50754ca |
Author: | stefan |
Category: | |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2021-03-08 12:31:35 |
Source code size: | 4245 bytes / 131 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 241 / 582 |
Version history: | 31 change(s) |
Referenced in: | [show references] |