Libraryless. Click here for Pure Java version (2625L/17K).
1 | // TODO: empty values might not be handled correctly |
2 | // (e.g. a,,c) |
3 | sclass ExtendedCSVParser implements Steppable { |
4 | S csv; |
5 | int numFields; |
6 | long numRecords; |
7 | long maxRecordsToLoad = -1; // set to >= 0 to limit |
8 | int i, startOfValue; |
9 | bool inQuotedValue; |
10 | |
11 | *() {} |
12 | *(S *csv) {} |
13 | |
14 | run { |
15 | stepAll(this); |
16 | } |
17 | |
18 | public bool step() { |
19 | if (i >= l(csv)) { |
20 | foundValue(substring(csv, startOfValue)); |
21 | endOfRecord(); |
22 | false; |
23 | } |
24 | |
25 | char c = csv.charAt(i); |
26 | if (c == ',') { |
27 | if (!inQuotedValue) { |
28 | foundValue(substring(csv, startOfValue, i)); |
29 | startOfValue = i+1; |
30 | } |
31 | } else if (c == '\r' || c == '\n') { |
32 | if (!inQuotedValue) { |
33 | foundValue(substring(csv, startOfValue, i)); |
34 | startOfValue = i+1; |
35 | if (endOfRecord()) false; |
36 | } |
37 | } else if (c == '"') { |
38 | if (inQuotedValue) { |
39 | if (charAt(csv, i+1) == '"') i++; // double quotes inside of a value |
40 | else |
41 | inQuotedValue = false; |
42 | } else |
43 | set inQuotedValue; |
44 | } |
45 | i++; |
46 | true; |
47 | } |
48 | |
49 | S unquoteValue(S value) { |
50 | ret dropPrefix("\"", dropSuffix("\"", replace(trim(value), "\"\"", "\""))); |
51 | } |
52 | |
53 | void foundValue(S value) { |
54 | if (nempty(value)) { |
55 | ++numFields; |
56 | onValueFound(unquoteValue(value)); |
57 | } |
58 | } |
59 | |
60 | // return true if should exit |
61 | bool endOfRecord() { |
62 | if (numFields == 0) false; |
63 | onEndOfRecord(); |
64 | numFields = 0; |
65 | ret ++numRecords >= maxRecordsToLoad && maxRecordsToLoad >= 0; |
66 | } |
67 | |
68 | swappable void onValueFound(S value) {} |
69 | swappable void onEndOfRecord() {} |
70 | } |
Began life as a copy of #1008333
download show line numbers debug dex old transpilations
Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt
No comments. add comment
Snippet ID: | #1030417 |
Snippet name: | ExtendedCSVParser - parse CSV with multiple lines in a value, inner quotes escaped as "" |
Eternal ID of this version: | #1030417/11 |
Text MD5: | a30109b0d22a814deab4f4cf8632026f |
Transpilation MD5: | e12f6a057c8b94cb5ad5a2b4b8005432 |
Author: | stefan |
Category: | javax / parsing |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-12-14 16:35:55 |
Source code size: | 1659 bytes / 70 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 233 / 474 |
Version history: | 10 change(s) |
Referenced in: | [show references] |