Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

248
LINES

< > BotCompany Repo | #1029446 // BufferedDiskIntMemory64 [handles files > 16 GB, memory-mapped]

JavaX fragment (include) [tags: use-pretranspiled]

Uses 11335K of libraries. Click here for Pure Java version (8103L/51K).

!include once #1027304 // Eclipse Collections

import java.nio.*;
import java.nio.channels.*;

// read-only, so far. should be thread-safe
// cache/cacheSize etc. are not used when useByteBuffers = true
// fast version (uses memory-mapped IO)
final sclass BufferedDiskIntMemory64 implements IIntMemory64, AutoCloseable {
  File file;
  long size; // file size in ints
  RandomAccessFile raf;
  bool bigEndian = true;
  bool writable = true;
  bool useByteBuffers = true;
  bool debug, verboseEvictions;
  long cacheSize = 128*1024*1024; // in bytes
  LongLongHashMap l1cache; // experimental
  
  // pageSize is in ints
  // page indices are ints interpreted as unsigned
  int pageShift, pageSize, maxCachedPages;
  new LongObjectHashMap<CacheEntry> cache;
  CacheEntry newestCacheEntry, oldestCacheEntry;
  
  // byte buffers
  int byteBufferShift = 28; // in ints, so each buffer is 1 GB
  long byteBufferSize = 1 << byteBufferShift;
  MappedByteBuffer[] byteBuffers;
  
  // stats
  long pageLoads, evictions;
  long pageLoadPrintInterval = 1000;
  
  static int defaultPageSize = 4096; // in bytes
  
  sclass CacheEntry {
    long page;
    bool dirty;
    int[] data;
    CacheEntry newer, older; // MRU list
    
    toString { ret "Page " + page; }
  }
  
  *() {
    setPageSize(4096);
  }
  
  synchronized void setPageSize(int pageSizeInBytes) {
    clearCache();
    pageShift = 31-Int.numberOfLeadingZeros(pageSizeInBytes >> 2);
    pageSize = 1 << pageShift;
    updateMaxCachedPages();
  }
  
  void setCacheSize(long bytes) { setMaxCachedBytes(bytes); }
  synchronized void setMaxCachedBytes(long bytes) {
    cacheSize = bytes;
    updateMaxCachedPages();
  }
  
  // internal
  void updateMaxCachedPages {
    maxCachedPages = (int) (cacheSize >> (pageShift+2));
  }
  
  long maxCachedBytes() {
    ret ((long) maxCachedPages) << (pageShift+2);
  }
  
  long cachedBytes() {
    ret ((long) cache.size()) << (pageShift+2);
  }
  
  synchronized void clearCache {
    flush();
    cache = new LongObjectHashMap;
    newestCacheEntry = oldestCacheEntry = null;
  }
  
  
  *(File *file) {
    this(file, false);
  }
  
  *(File *file, bool *writable) {
    this();
    load(file, writable);
  }
  
  void load(File file, bool writable default false) {
    this.file = file;
    this.writable = writable;
    size = fileSize(file)/4;
    raf = newRandomAccessFile(file, writable ? "rw" : "r");
    
    if (useByteBuffers) ctex {
      byteBuffers = new MappedByteBuffer[toInt(rightShift_ceil(size, byteBufferShift))];
      FileChannel channel = raf.getChannel();
      for i over byteBuffers: {
        long pos = (long) i << (byteBufferShift+2);
        int len = toInt(min(1 << (byteBufferShift+2), size*4-pos));
        //print("Mapping bytes " + longToHex(pos) + "-" + longToHex(pos+len));
        byteBuffers[i] = channel.map(FileChannel.MapMode.READ_ONLY, pos, len);
        byteBuffers[i].order(bigEndian ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN);
      }
    }
  }
  
  public synchronized void close {
    flush();
    dispose raf;
    cache = null;
  }
  
  synchronized void flush {
    for (CacheEntry e : cache.values())
      flushPage(e);
  }
  
  ifndef BufferedDiskIntMemory64_unsynchronized synchronized endifndef
  public int get(long idx) {
    rangeCheck(idx, size);
    if (l1cache == null)
      ret get_raw(idx);
    long val = l1cache.getIfAbsent(idx, Long.MAX_VALUE);
    if (val == Long.MAX_VALUE)
      l1cache.put(idx, val = get_raw(idx));
    ret (int) val;
  }
    
  int get_raw(long idx) {
    if (useByteBuffers)
      ret byteBuffers[(int) (idx >> byteBufferShift)].getInt(((int) (idx & (byteBufferSize-1)))*4);
    ret loadCell(idx).data[(int) idx & (pageSize-1)];
  }
  
  public synchronized void set(long idx, int val) {
    checkWritable();
    rangeCheck(idx, size);
    CacheEntry e = loadCell(idx);
    e.data[(int) idx & (pageSize-1)] = val;
    e.dirty = true;
  }
  
  CacheEntry loadCell(long idx) {
    long page = idx >> pageShift;
    CacheEntry e = cache.get(page);
    if (e == null) {
      if (debug) print("Accessing unloaded cell " + longToHex(idx));
      e = loadPage(page);
    } else
      touchPage(e);
    ret e;
  }
  
  void touchPage(CacheEntry e) {
    if (e == newestCacheEntry) ret;
    if (debug) print("Touching page " + e.page + ". Older=" + e.older + ", newer=" + e.newer + ". Oldest=" + oldestCacheEntry + ", newest=" + newestCacheEntry);
    
    // Take out of list
    removeFromLinkedList(e);
    
    // re-insert at top
    e.newer = null; // nobody newer than us
    e.older = newestCacheEntry; // point to previous newest
    newestCacheEntry.newer = e; // point previous newest to us
    newestCacheEntry = e; // make us the newest
    
    if (debug) print("Touched page " + e.page + ". Older=" + e.older + ", newer=" + e.newer + ". Oldest=" + oldestCacheEntry + ", newest=" + newestCacheEntry);
  }
  
  void removeFromLinkedList(CacheEntry e) {
    if (e.older != null)
      e.older.newer = e.newer; // There is an older page, point it to who was on top of us
    else
      oldestCacheEntry = e.newer; // We were the oldest, point it to who was on top uf os
    if (e.newer != null)
      e.newer.older = e.older; // Point who was on top of us to who was behind us (or no one)
    else
      newestCacheEntry = e.older;
  }
  
  bool cacheFull() { ret cache.size() >= maxCachedPages; }
  
  void evictAPage {
    ++evictions;
    CacheEntry e = oldestCacheEntry;
    flushPage(e);
    if (debug || verboseEvictions) print("Evicting page " + e.page);
    cache.remove(e.page);
    removeFromLinkedList(e);
  }
  
  void flushPage(CacheEntry e) ctex {
    if (!e.dirty) ret;
    if (debug) print("Saving page " + e.page);
    raf.seek(e.page << (pageShift+2));
    byte[] buf = bigEndian ? intArrayToBytes(e.data) : intArrayToBytes_littleEndian(e.data);
    raf.write(buf);
    e.dirty = false;
  }
  
  CacheEntry loadPage(long page) ctex {
    if (((++pageLoads) % pageLoadPrintInterval) == 0)
      print(fileName(file) + ": " + n2(pageLoads, "page load")
        + ", cache size: " + toM(usedCacheSize()) + "/" + toM(maxCacheSize()) + "M");
    if (cacheFull()) evictAPage();
    if (debug) print("Loading page " + page);
    raf.seek(page << (pageShift+2));
    byte[] buf = new[pageSize*4];
    raf.read(buf);
    new CacheEntry e;
    e.page = page;
    e.data = bigEndian ? intArrayFromBytes(buf) : intArrayFromBytes_littleEndian(buf);
    cache.put(page, e);
    
    // put e in front of MRU list
    e.older = newestCacheEntry; // point to previous top
    if (newestCacheEntry != null) // list is not empty, update "newer" pointer of current top
      newestCacheEntry.newer = e;
    else oldestCacheEntry = e; // otherwise, we are also the oldest entry
    newestCacheEntry = e; // we are new top
    
    if (debug) print("Loaded page " + e.page + ". Older=" + e.older + ", newer=" + e.newer + ". Oldest=" + oldestCacheEntry + ", newest=" + newestCacheEntry);
    ret e;
  }
  
  void checkWritable {
    if (!writable) fail("read-only");
  }
  
  public long size() {
    ret size;
  }
  
  double cacheFullPercentage() {
    ret percentRatio(cache.size(), maxCachedPages);
  }
  
  long usedCacheSize() { ret cache.size()*(long) pageSize; }
  long maxCacheSize() { ret cacheSize; }
  
  public synchronized void ensureSize(int size) {
    this.size = max(this.size, size);
  }
}

Author comment

Began life as a copy of #1029369

download  show line numbers  debug dex  old transpilations   

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

No comments. add comment

Snippet ID: #1029446
Snippet name: BufferedDiskIntMemory64 [handles files > 16 GB, memory-mapped]
Eternal ID of this version: #1029446/38
Text MD5: 6fc90a3821630cbf818f05893e5093cf
Transpilation MD5: 79d89e771302c7d9d442147bbb07ec42
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2021-06-23 23:44:01
Source code size: 7652 bytes / 248 lines
Pitched / IR pitched: No / No
Views / Downloads: 343 / 752
Version history: 37 change(s)
Referenced in: [show references]