Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

70
LINES

< > BotCompany Repo | #1030417 // ExtendedCSVParser - parse CSV with multiple lines in a value, inner quotes escaped as ""

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (2625L/17K).

// TODO: empty values might not be handled correctly
// (e.g. a,,c)
sclass ExtendedCSVParser implements Steppable {
  S csv;
  int numFields;
  long numRecords;
  long maxRecordsToLoad = -1; // set to >= 0 to limit
  int i, startOfValue;
  bool inQuotedValue;
  
  *() {}
  *(S *csv) {}
  
  run {
    stepAll(this);
  }
  
  public bool step() {
    if (i >= l(csv)) {
      foundValue(substring(csv, startOfValue));
      endOfRecord();
      false;
    }
    
    char c = csv.charAt(i);
    if (c == ',') {
      if (!inQuotedValue) {
        foundValue(substring(csv, startOfValue, i));
        startOfValue = i+1;
      }
    } else if (c == '\r' || c == '\n') {
      if (!inQuotedValue) {
        foundValue(substring(csv, startOfValue, i));
        startOfValue = i+1;
        if (endOfRecord()) false;
      }
    } else if (c == '"') {
      if (inQuotedValue) {
        if (charAt(csv, i+1) == '"') i++; // double quotes inside of a value
        else
          inQuotedValue = false;
      } else
        set inQuotedValue;
    }
    i++;
    true;
  }
  
  S unquoteValue(S value) {
    ret dropPrefix("\"", dropSuffix("\"", replace(trim(value), "\"\"", "\"")));
  }
  
  void foundValue(S value) {
    if (nempty(value)) {
      ++numFields;
      onValueFound(unquoteValue(value));
    }
  }
  
  // return true if should exit
  bool endOfRecord() {
    if (numFields == 0) false;
    onEndOfRecord();
    numFields = 0;
    ret ++numRecords >= maxRecordsToLoad && maxRecordsToLoad >= 0;
  }
  
  swappable void onValueFound(S value) {}
  swappable void onEndOfRecord() {}
}

Author comment

Began life as a copy of #1008333

download  show line numbers  debug dex  old transpilations   

Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt

No comments. add comment

Snippet ID: #1030417
Snippet name: ExtendedCSVParser - parse CSV with multiple lines in a value, inner quotes escaped as ""
Eternal ID of this version: #1030417/11
Text MD5: a30109b0d22a814deab4f4cf8632026f
Transpilation MD5: e12f6a057c8b94cb5ad5a2b4b8005432
Author: stefan
Category: javax / parsing
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-12-14 16:35:55
Source code size: 1659 bytes / 70 lines
Pitched / IR pitched: No / No
Views / Downloads: 231 / 471
Version history: 10 change(s)
Referenced in: [show references]