Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

70
LINES

< > BotCompany Repo | #1030417 // ExtendedCSVParser - parse CSV with multiple lines in a value, inner quotes escaped as ""

JavaX fragment (include) [tags: use-pretranspiled]

Libraryless. Click here for Pure Java version (2625L/17K).

1  
// TODO: empty values might not be handled correctly
2  
// (e.g. a,,c)
3  
sclass ExtendedCSVParser implements Steppable {
4  
  S csv;
5  
  int numFields;
6  
  long numRecords;
7  
  long maxRecordsToLoad = -1; // set to >= 0 to limit
8  
  int i, startOfValue;
9  
  bool inQuotedValue;
10  
  
11  
  *() {}
12  
  *(S *csv) {}
13  
  
14  
  run {
15  
    stepAll(this);
16  
  }
17  
  
18  
  public bool step() {
19  
    if (i >= l(csv)) {
20  
      foundValue(substring(csv, startOfValue));
21  
      endOfRecord();
22  
      false;
23  
    }
24  
    
25  
    char c = csv.charAt(i);
26  
    if (c == ',') {
27  
      if (!inQuotedValue) {
28  
        foundValue(substring(csv, startOfValue, i));
29  
        startOfValue = i+1;
30  
      }
31  
    } else if (c == '\r' || c == '\n') {
32  
      if (!inQuotedValue) {
33  
        foundValue(substring(csv, startOfValue, i));
34  
        startOfValue = i+1;
35  
        if (endOfRecord()) false;
36  
      }
37  
    } else if (c == '"') {
38  
      if (inQuotedValue) {
39  
        if (charAt(csv, i+1) == '"') i++; // double quotes inside of a value
40  
        else
41  
          inQuotedValue = false;
42  
      } else
43  
        set inQuotedValue;
44  
    }
45  
    i++;
46  
    true;
47  
  }
48  
  
49  
  S unquoteValue(S value) {
50  
    ret dropPrefix("\"", dropSuffix("\"", replace(trim(value), "\"\"", "\"")));
51  
  }
52  
  
53  
  void foundValue(S value) {
54  
    if (nempty(value)) {
55  
      ++numFields;
56  
      onValueFound(unquoteValue(value));
57  
    }
58  
  }
59  
  
60  
  // return true if should exit
61  
  bool endOfRecord() {
62  
    if (numFields == 0) false;
63  
    onEndOfRecord();
64  
    numFields = 0;
65  
    ret ++numRecords >= maxRecordsToLoad && maxRecordsToLoad >= 0;
66  
  }
67  
  
68  
  swappable void onValueFound(S value) {}
69  
  swappable void onEndOfRecord() {}
70  
}

Author comment

Began life as a copy of #1008333

download  show line numbers  debug dex  old transpilations   

Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt

No comments. add comment

Snippet ID: #1030417
Snippet name: ExtendedCSVParser - parse CSV with multiple lines in a value, inner quotes escaped as ""
Eternal ID of this version: #1030417/11
Text MD5: a30109b0d22a814deab4f4cf8632026f
Transpilation MD5: e12f6a057c8b94cb5ad5a2b4b8005432
Author: stefan
Category: javax / parsing
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2020-12-14 16:35:55
Source code size: 1659 bytes / 70 lines
Pitched / IR pitched: No / No
Views / Downloads: 157 / 386
Version history: 10 change(s)
Referenced in: [show references]