Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

248
LINES

< > BotCompany Repo | #1029446 // BufferedDiskIntMemory64 [handles files > 16 GB, memory-mapped]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (8103L/51K).

1  
!include once #1027304 // Eclipse Collections
2  
3  
import java.nio.*;
4  
import java.nio.channels.*;
5  
6  
// read-only, so far. should be thread-safe
7  
// cache/cacheSize etc. are not used when useByteBuffers = true
8  
// fast version (uses memory-mapped IO)
9  
final sclass BufferedDiskIntMemory64 implements IIntMemory64, AutoCloseable {
10  
  File file;
11  
  long size; // file size in ints
12  
  RandomAccessFile raf;
13  
  bool bigEndian = true;
14  
  bool writable = true;
15  
  bool useByteBuffers = true;
16  
  bool debug, verboseEvictions;
17  
  long cacheSize = 128*1024*1024; // in bytes
18  
  LongLongHashMap l1cache; // experimental
19  
  
20  
  // pageSize is in ints
21  
  // page indices are ints interpreted as unsigned
22  
  int pageShift, pageSize, maxCachedPages;
23  
  new LongObjectHashMap<CacheEntry> cache;
24  
  CacheEntry newestCacheEntry, oldestCacheEntry;
25  
  
26  
  // byte buffers
27  
  int byteBufferShift = 28; // in ints, so each buffer is 1 GB
28  
  long byteBufferSize = 1 << byteBufferShift;
29  
  MappedByteBuffer[] byteBuffers;
30  
  
31  
  // stats
32  
  long pageLoads, evictions;
33  
  long pageLoadPrintInterval = 1000;
34  
  
35  
  static int defaultPageSize = 4096; // in bytes
36  
  
37  
  sclass CacheEntry {
38  
    long page;
39  
    bool dirty;
40  
    int[] data;
41  
    CacheEntry newer, older; // MRU list
42  
    
43  
    toString { ret "Page " + page; }
44  
  }
45  
  
46  
  *() {
47  
    setPageSize(4096);
48  
  }
49  
  
50  
  synchronized void setPageSize(int pageSizeInBytes) {
51  
    clearCache();
52  
    pageShift = 31-Int.numberOfLeadingZeros(pageSizeInBytes >> 2);
53  
    pageSize = 1 << pageShift;
54  
    updateMaxCachedPages();
55  
  }
56  
  
57  
  void setCacheSize(long bytes) { setMaxCachedBytes(bytes); }
58  
  synchronized void setMaxCachedBytes(long bytes) {
59  
    cacheSize = bytes;
60  
    updateMaxCachedPages();
61  
  }
62  
  
63  
  // internal
64  
  void updateMaxCachedPages {
65  
    maxCachedPages = (int) (cacheSize >> (pageShift+2));
66  
  }
67  
  
68  
  long maxCachedBytes() {
69  
    ret ((long) maxCachedPages) << (pageShift+2);
70  
  }
71  
  
72  
  long cachedBytes() {
73  
    ret ((long) cache.size()) << (pageShift+2);
74  
  }
75  
  
76  
  synchronized void clearCache {
77  
    flush();
78  
    cache = new LongObjectHashMap;
79  
    newestCacheEntry = oldestCacheEntry = null;
80  
  }
81  
  
82  
  
83  
  *(File *file) {
84  
    this(file, false);
85  
  }
86  
  
87  
  *(File *file, bool *writable) {
88  
    this();
89  
    load(file, writable);
90  
  }
91  
  
92  
  void load(File file, bool writable default false) {
93  
    this.file = file;
94  
    this.writable = writable;
95  
    size = fileSize(file)/4;
96  
    raf = newRandomAccessFile(file, writable ? "rw" : "r");
97  
    
98  
    if (useByteBuffers) ctex {
99  
      byteBuffers = new MappedByteBuffer[toInt(rightShift_ceil(size, byteBufferShift))];
100  
      FileChannel channel = raf.getChannel();
101  
      for i over byteBuffers: {
102  
        long pos = (long) i << (byteBufferShift+2);
103  
        int len = toInt(min(1 << (byteBufferShift+2), size*4-pos));
104  
        //print("Mapping bytes " + longToHex(pos) + "-" + longToHex(pos+len));
105  
        byteBuffers[i] = channel.map(FileChannel.MapMode.READ_ONLY, pos, len);
106  
        byteBuffers[i].order(bigEndian ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN);
107  
      }
108  
    }
109  
  }
110  
  
111  
  public synchronized void close {
112  
    flush();
113  
    dispose raf;
114  
    cache = null;
115  
  }
116  
  
117  
  synchronized void flush {
118  
    for (CacheEntry e : cache.values())
119  
      flushPage(e);
120  
  }
121  
  
122  
  ifndef BufferedDiskIntMemory64_unsynchronized synchronized endifndef
123  
  public int get(long idx) {
124  
    rangeCheck(idx, size);
125  
    if (l1cache == null)
126  
      ret get_raw(idx);
127  
    long val = l1cache.getIfAbsent(idx, Long.MAX_VALUE);
128  
    if (val == Long.MAX_VALUE)
129  
      l1cache.put(idx, val = get_raw(idx));
130  
    ret (int) val;
131  
  }
132  
    
133  
  int get_raw(long idx) {
134  
    if (useByteBuffers)
135  
      ret byteBuffers[(int) (idx >> byteBufferShift)].getInt(((int) (idx & (byteBufferSize-1)))*4);
136  
    ret loadCell(idx).data[(int) idx & (pageSize-1)];
137  
  }
138  
  
139  
  public synchronized void set(long idx, int val) {
140  
    checkWritable();
141  
    rangeCheck(idx, size);
142  
    CacheEntry e = loadCell(idx);
143  
    e.data[(int) idx & (pageSize-1)] = val;
144  
    e.dirty = true;
145  
  }
146  
  
147  
  CacheEntry loadCell(long idx) {
148  
    long page = idx >> pageShift;
149  
    CacheEntry e = cache.get(page);
150  
    if (e == null) {
151  
      if (debug) print("Accessing unloaded cell " + longToHex(idx));
152  
      e = loadPage(page);
153  
    } else
154  
      touchPage(e);
155  
    ret e;
156  
  }
157  
  
158  
  void touchPage(CacheEntry e) {
159  
    if (e == newestCacheEntry) ret;
160  
    if (debug) print("Touching page " + e.page + ". Older=" + e.older + ", newer=" + e.newer + ". Oldest=" + oldestCacheEntry + ", newest=" + newestCacheEntry);
161  
    
162  
    // Take out of list
163  
    removeFromLinkedList(e);
164  
    
165  
    // re-insert at top
166  
    e.newer = null; // nobody newer than us
167  
    e.older = newestCacheEntry; // point to previous newest
168  
    newestCacheEntry.newer = e; // point previous newest to us
169  
    newestCacheEntry = e; // make us the newest
170  
    
171  
    if (debug) print("Touched page " + e.page + ". Older=" + e.older + ", newer=" + e.newer + ". Oldest=" + oldestCacheEntry + ", newest=" + newestCacheEntry);
172  
  }
173  
  
174  
  void removeFromLinkedList(CacheEntry e) {
175  
    if (e.older != null)
176  
      e.older.newer = e.newer; // There is an older page, point it to who was on top of us
177  
    else
178  
      oldestCacheEntry = e.newer; // We were the oldest, point it to who was on top uf os
179  
    if (e.newer != null)
180  
      e.newer.older = e.older; // Point who was on top of us to who was behind us (or no one)
181  
    else
182  
      newestCacheEntry = e.older;
183  
  }
184  
  
185  
  bool cacheFull() { ret cache.size() >= maxCachedPages; }
186  
  
187  
  void evictAPage {
188  
    ++evictions;
189  
    CacheEntry e = oldestCacheEntry;
190  
    flushPage(e);
191  
    if (debug || verboseEvictions) print("Evicting page " + e.page);
192  
    cache.remove(e.page);
193  
    removeFromLinkedList(e);
194  
  }
195  
  
196  
  void flushPage(CacheEntry e) ctex {
197  
    if (!e.dirty) ret;
198  
    if (debug) print("Saving page " + e.page);
199  
    raf.seek(e.page << (pageShift+2));
200  
    byte[] buf = bigEndian ? intArrayToBytes(e.data) : intArrayToBytes_littleEndian(e.data);
201  
    raf.write(buf);
202  
    e.dirty = false;
203  
  }
204  
  
205  
  CacheEntry loadPage(long page) ctex {
206  
    if (((++pageLoads) % pageLoadPrintInterval) == 0)
207  
      print(fileName(file) + ": " + n2(pageLoads, "page load")
208  
        + ", cache size: " + toM(usedCacheSize()) + "/" + toM(maxCacheSize()) + "M");
209  
    if (cacheFull()) evictAPage();
210  
    if (debug) print("Loading page " + page);
211  
    raf.seek(page << (pageShift+2));
212  
    byte[] buf = new[pageSize*4];
213  
    raf.read(buf);
214  
    new CacheEntry e;
215  
    e.page = page;
216  
    e.data = bigEndian ? intArrayFromBytes(buf) : intArrayFromBytes_littleEndian(buf);
217  
    cache.put(page, e);
218  
    
219  
    // put e in front of MRU list
220  
    e.older = newestCacheEntry; // point to previous top
221  
    if (newestCacheEntry != null) // list is not empty, update "newer" pointer of current top
222  
      newestCacheEntry.newer = e;
223  
    else oldestCacheEntry = e; // otherwise, we are also the oldest entry
224  
    newestCacheEntry = e; // we are new top
225  
    
226  
    if (debug) print("Loaded page " + e.page + ". Older=" + e.older + ", newer=" + e.newer + ". Oldest=" + oldestCacheEntry + ", newest=" + newestCacheEntry);
227  
    ret e;
228  
  }
229  
  
230  
  void checkWritable {
231  
    if (!writable) fail("read-only");
232  
  }
233  
  
234  
  public long size() {
235  
    ret size;
236  
  }
237  
  
238  
  double cacheFullPercentage() {
239  
    ret percentRatio(cache.size(), maxCachedPages);
240  
  }
241  
  
242  
  long usedCacheSize() { ret cache.size()*(long) pageSize; }
243  
  long maxCacheSize() { ret cacheSize; }
244  
  
245  
  public synchronized void ensureSize(int size) {
246  
    this.size = max(this.size, size);
247  
  }
248  
}

Author comment

Began life as a copy of #1029369

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029446
Snippet name: BufferedDiskIntMemory64 [handles files > 16 GB, memory-mapped]
Eternal ID of this version: #1029446/38
Text MD5: 6fc90a3821630cbf818f05893e5093cf
Transpilation MD5: 79d89e771302c7d9d442147bbb07ec42
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2021-06-23 23:44:01
Source code size: 7652 bytes / 248 lines
Pitched / IR pitched: No / No
Views / Downloads: 345 / 756
Version history: 37 change(s)
Referenced in: [show references]