Qore CsvUtil Module Reference  1.6.2
 All Classes Namespaces Functions Variables Groups Pages
CsvUtil.qm.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // @file CsvUtil.qm Qore user module for working with CSV files
3 
4 /* CsvUtil.qm Copyright 2012 - 2018 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // minimum required Qore version
26 
27 
28 // assume local var scope, do not use "$" for vars, members, and method calls
29 
30 
31 /* see release notes below for version history
32 */
33 
305 class CsvHelper {
306 
307 public:
308  private :
309  const C_OPT1 = 0x1;
310  const C_OPT2 = 0x2;
312  const Types = (
313  "int": True,
314  "*int": True,
315  "float": True,
316  "*float": True,
317  "number": True,
318  "*number": True,
319  "string": True,
320  "*string": True,
321  "date": True,
322  "*date": True,
323  );
324 
326  const FieldAttrs = ("type", "format", "timezone", "code", "header");
327 
329  bool tolwr = False;
330 
332  string date_format;
333 
335  string number_format;
336 
338  hash m_specs;
339 
341  string errname;
342 
343  // reorder data according headers set by options.headers or read from CsvHeader
344  bool headerReorder = True;
345 
346 public:
347 
349  constructor (string n_errname);
350 
351 
353 
354 private:
355  bool isMultiType();
356 public:
357 
358 
360 
361 private:
362  checkType(string fld_errs, string key, string value);
363 public:
364 
365 
366  // get spec from options.fields for old Csv. Check spec param for new Csv
367 
368 private:
369  hash getSpec(*hash fields, string fld_errs, int C_OPTx);
370 public:
371 
372 
373 
374 private:
375  hash getSpec1(*hash fields);
376 public:
377 
378 
379 
380 private:
381  hash getSpec2(hash spec);
382 public:
383 
384 
390 private:
391  list adjustFieldsFromHeaders(string type, *list headers, bool check = False);
392 public:
393 
394 
395 }; // class CsvHelper
396 
398 namespace CsvUtil {
400  const EOL_UNIX = "\n";
402  const EOL_WIN = "\r\n";
404  const EOL_MACINTOSH = "\r";
405 
406  // helper list of end of line values
407  const EOLS = (EOL_UNIX, EOL_WIN, EOL_MACINTOSH, );
408 
410  const CSV_TYPE_UNKNOWN = "<unknown>";
412  const CSV_TYPE_SINGLE = "<single>";
413 
416 
419 
420 
422 
665 class AbstractCsvIterator : public Qore::AbstractIterator, private CsvHelper {
666 
667 public:
668  private :
670  const Options = (
671  "compat_force_empty_string": C_OPT1|C_OPT2,
672  "date_format": C_OPT1|C_OPT2,
673  "date-format": C_OPT1|C_OPT2,
674  "encoding": C_OPT1|C_OPT2,
675  "eol": C_OPT1|C_OPT2,
676  "extended_record": C_OPT2,
677  "fields": C_OPT1,
678  "header-lines": C_OPT1|C_OPT2,
679  "header_lines": C_OPT1|C_OPT2,
680  "header-names": C_OPT1|C_OPT2,
681  "header_names": C_OPT1|C_OPT2,
682  "header_reorder": C_OPT1|C_OPT2,
683  "headers": C_OPT1,
684  "ignore-empty": C_OPT1|C_OPT2,
685  "ignore_empty": C_OPT1|C_OPT2,
686  "ignore-whitespace": C_OPT1|C_OPT2,
687  "ignore_whitespace": C_OPT1|C_OPT2,
688  "number_format": C_OPT1|C_OPT2,
689  "quote": C_OPT1|C_OPT2,
690  "separator": C_OPT1|C_OPT2,
691  "timezone": C_OPT1|C_OPT2,
692  "tolwr": C_OPT1|C_OPT2,
693  "verify-columns": C_OPT1|C_OPT2,
694  "verify_columns": C_OPT1|C_OPT2,
695  );
696 
697  // field separator
698  string separator = ",";
699 
700  // field content delimiter
701  string quote = "\"";
702 
703  // number of header lines
704  softint headerLines = 0;
705 
706  // flag to use string names from the first header row if possible
707  bool headerNames = False;
708 
709  // True if empty lines should be ignored
710  bool ignoreEmptyLines = True;
711 
712  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
713  bool ignoreWhitespace = True;
714 
715  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
716  *TimeZone timezone;
717 
718  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
719  bool checkElementCounts = False;
720 
721  // getRecord/getValue returns extended hash
722  bool extendedRecord = False;
723 
724  // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
725  bool compat_force_empty_string = False;
726 
727  // column count for verifying column counts
728  int cc;
729 
730  // current record count for the index() method
731  int rc = 0;
732 
733  // to resolve record type by rules
734  hash m_resolve_by_rule;
735 
736  // to resolve record type by number of fields
737  hash m_resolve_by_count;
738 
739  // list of idx to field transformarions, in order of spec
740  hash m_resolve_by_idx;
741 
742  // fake specs based on the first non-header row
743  bool fakeHeaderNames;
744 
745  // data source iterator
746  AbstractLineIterator lineIterator;
747 
748 public:
749 
751 
757  constructor(AbstractLineIterator li, *hash opts);
758 
759 
761 
766  // NOTE: when declared as *hash then always calls this constructor
767  constructor(AbstractLineIterator li, hash spec, hash opts);
768 
769 
771 
772 private:
773  processCommonOptions(*hash opts, int C_OPTx);
774 public:
775 
776 
778 
779 private:
780  processSpec(hash spec);
781 public:
782 
783 
785 
786 private:
787  prepareFieldsFromHeaders(*list headers);
788 public:
789 
790 
791  bool valid();
792 
793 
795 
800  bool next();
801 
802 
804 
811  auto memberGate(string name);
812 
813 
815 
826  hash getValue();
827 
828 
830 
843  hash getRecord(bool extended);
844 
845 
847 
858  hash getRecord();
859 
860 
862 
874  auto getRecordList();
875 
876 
878 
885  string getSeparator();
886 
887 
889 
896  string getQuote();
897 
898 
900 
907  *list getHeaders();
908 
909 
911 
916  *list getHeaders(string type);
917 
918 
920 
931  int index();
932 
933 
935 
948  int lineNumber();
949 
950 
951 
952 private:
953  auto handleType(hash fh, *string val);
954 public:
955 
956 
958 
959 private:
961 public:
962 
963 
965 
972  string identifyType(list rec);
973 
974 
976 
984 private:
985  *string identifyTypeImpl(list rec);
986 public:
987 
988 
990 
991 private:
992  hash parseLine();
993 public:
994 
995  };
996 
998 
1004 
1005 public:
1007 
1013 
1014 
1016 
1023 
1024 
1026 
1032  constructor(Qore::InputStream input, string encoding = "UTF-8", *hash opts) ;
1033 
1034 
1036 
1043  constructor(Qore::InputStream input, string encoding = "UTF-8", hash spec, hash opts) ;
1044 
1045 
1046  auto memberGate(string name);
1047 
1048  };
1049 
1051 
1059 
1060 public:
1061  private :
1063  string m_file_path;
1064 
1065 public:
1066 
1068 
1073  constructor(string path, *hash opts) ;
1074 
1075 
1077 
1081  constructor(string path, hash spec, hash opts) ;
1082 
1083 
1085  auto memberGate(string name);
1086 
1087 
1089  string getEncoding();
1090 
1091 
1093  string getFileName();
1094 
1095 
1097  hash<Qore::StatInfo> hstat();
1098 
1099 
1101  list stat();
1102 
1103  }; // CsvFileIterator class
1104 
1106 
1114 
1115 public:
1116 
1118 
1123  constructor(string data, *hash opts) ;
1124 
1125 
1127 
1131  constructor(string data, hash spec, hash opts) ;
1132 
1133 
1134  auto memberGate(string name);
1135 
1136 
1137  };
1138 
1140 
1249 class AbstractCsvWriter : private CsvHelper {
1250 
1251 public:
1252  private :
1254  const Options = (
1255  "block": C_OPT1|C_OPT2,
1256  "datamap": C_OPT1,
1257  "date_format": C_OPT1|C_OPT2,
1258  "date-format": C_OPT1|C_OPT2,
1259  "encoding": C_OPT1|C_OPT2,
1260  "eol": C_OPT1|C_OPT2,
1261  "fields": C_OPT1,
1262  "headers": C_OPT1,
1263  "header_reorder": C_OPT1,
1264  "info_log": C_OPT1|C_OPT2,
1265  "number_format": C_OPT1|C_OPT2,
1266  "optimal_quotes": C_OPT1|C_OPT2,
1267  "optimal-quotes": C_OPT1|C_OPT2,
1268  "quote": C_OPT1|C_OPT2,
1269  "quote_escape": C_OPT1|C_OPT2,
1270  "separator": C_OPT1|C_OPT2,
1271  "verify_columns": C_OPT1|C_OPT2,
1272  "verify-columns": C_OPT1|C_OPT2,
1273  "write_headers": C_OPT1|C_OPT2,
1274  "write-headers": C_OPT1|C_OPT2,
1275  );
1276 
1278  string encoding;
1279 
1281  string separator = ",";
1282 
1284  string quote = "\"";
1285 
1287  string m_quoteEscapeChar = "\\";
1288 
1290  string eol = EOL_UNIX;
1291 
1294 
1296  int lineNo = 0;
1297 
1299  int block = 1000;
1300 
1303 
1306 
1309 
1311  *code info_log;
1312 
1315 
1318 
1319 public:
1320 
1322 
1328  constructor(string n_errname, *hash n_opts);
1329 
1330 
1332 
1340  constructor(string n_errname, hash spec, hash n_opts);
1341 
1342 
1344 
1345 private:
1346  processCommonOptions(*hash n_opts, int C_OPTx);
1347 public:
1348 
1349 
1351 
1352 private:
1353  processSpec();
1354 public:
1355 
1356 
1358 
1359 private:
1360  writeHeaders();
1361 public:
1362 
1363 
1365 
1370  writeLine(list values);
1371 
1372 
1374 
1379  writeLine(hash values);
1380 
1381 
1383 
1389  writeLine(string type, list values);
1390 
1391 
1393 
1399  writeLine(string type, hash values);
1400 
1401 
1403 
1410  write(Qore::AbstractIterator iterator);
1411 
1412 
1414 
1421  write(Qore::SQL::SQLStatement iterator);
1422 
1423 
1425 
1432  write(list l);
1433 
1434 
1436 
1437 private:
1438  abstract writeRawLine(list values);
1439 public:
1440 
1442 
1448 private:
1449  string prepareRawLine(list values);
1450 public:
1451 
1452 
1453 
1454 private:
1455  string prepareRawLineIntern(list values);
1456 public:
1457 
1458 
1459  }; // AbstractCsvWriter class
1460 
1463 
1464 public:
1465  private :
1467  StreamWriter output;
1468 
1469 public:
1470 
1472 
1479 
1480 
1482 
1490 
1491 
1493 
1494 private:
1495  writeRawLine(list values);
1496 public:
1497 
1498  };
1499 
1501 
1506 
1507 public:
1508  private :
1511 
1512 public:
1513 
1515 
1523  constructor(string path, *hash opts) ;
1524 
1525 
1527 
1536  constructor(string path, hash spec, hash opts) ;
1537 
1538 
1539 
1540 private:
1541  openFile(string path);
1542 public:
1543 
1544 
1545 
1546 private:
1547  writeRawLine(list values);
1548 public:
1549 
1550  }; // CsvFileWriter
1551 
1553 
1558 
1559 public:
1560  private :
1561  // a csv content
1562  string content;
1563 
1564 public:
1565 
1567 
1572  constructor(*hash opts) ;
1573 
1574 
1576 
1582  constructor(hash spec, hash opts) ;
1583 
1584 
1585 
1586 private:
1587  initContent();
1588 public:
1589 
1590 
1591 
1592 private:
1593  writeRawLine(list values);
1594 public:
1595 
1596 
1598 
1607  string write(Qore::AbstractIterator iterator);
1608 
1609 
1611 
1620  string write(list l);
1621 
1622 
1624  string getContent();
1625 
1626  }; // CsvStringWriter
1627 }; // CsvUtil namespace
bool write_headers
this flag determines if any stored headers are output
Definition: CsvUtil.qm.dox.h:1305
constructor(string data, *hash opts)
Creates the CsvDataIterator with the input data and optionally an option hash.
hash m_out_by_name
mapping output field by name
Definition: CsvUtil.qm.dox.h:1314
constructor(*hash opts)
creates the CsvStringWriter single-type mode with content in the memory
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
string write(Qore::AbstractIterator iterator)
Stream iterator and return a CSV-formatted output string.
hash parseLine()
Parses a line in the file and returns a processed list of the fields.
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated ...
Definition: CsvUtil.qm.dox.h:665
processCommonOptions(*hash n_opts, int C_OPTx)
Process options and set internal variables.
list getLineAndSplit()
Read line split by separator/quote into list.
writeHeaders()
Write csv headers.
Qore::File file
the file to write
Definition: CsvUtil.qm.dox.h:1510
const True
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:670
hash m_out_by_idx
mapping output field by index
Definition: CsvUtil.qm.dox.h:1317
string getFileName()
Returns the file path/name used to open the file.
string getQuote()
Returns the current quote string.
bool checkElementCounts
verify the column count for every row; if a row does not match, then throw a CSVFILEITERATOR-DATA-ERR...
Definition: CsvUtil.qm.dox.h:1293
constructor(string path, *hash opts)
Creates the CsvFileIterator in single-type mode with the path of the file to read and an option hash...
const False
string m_quoteEscapeChar
quote escape character
Definition: CsvUtil.qm.dox.h:1287
*code info_log
a closure/call reference for informational logging when using write(SQLStatement) ...
Definition: CsvUtil.qm.dox.h:1311
list list(...)
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:1254
write(Qore::AbstractIterator iterator)
Stream an iterator into the output.
The CsvFileIterator class allows CSV files to be iterated on a record basis.
Definition: CsvUtil.qm.dox.h:1058
string getContent()
Get the current in-memory content as a string.
string eol
end of line sequence
Definition: CsvUtil.qm.dox.h:1290
string getSeparator()
Returns the current separator string.
The CsvStringWriter class for in-memory string CSV creation.
Definition: CsvUtil.qm.dox.h:1557
string separator
field separator
Definition: CsvUtil.qm.dox.h:1281
string quote
field content delimiter
Definition: CsvUtil.qm.dox.h:1284
csvutil_set_global_compat_force_empty_string(softbool val)
sets the global_compat_force_empty_string variable to force &quot;*string&quot; fields with no value to ret...
list stat()
Returns a stat list of the underlying file.
*string identifyTypeImpl(list rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
processSpec()
Process specification and set internal variable for mapping.
The AbstractCsvWriter class provides a parent for all CSV writers.
Definition: CsvUtil.qm.dox.h:1249
string baseTemplate
base template for value format
Definition: CsvUtil.qm.dox.h:1302
const EOL_MACINTOSH
Old (pre-OSX) Macintosh end of line character sequence.
Definition: CsvUtil.qm.dox.h:404
const CSV_TYPE_UNKNOWN
Record type when non matching any type.
Definition: CsvUtil.qm.dox.h:410
const EOL_UNIX
Unix end of line character sequence (for new OS X too)
Definition: CsvUtil.qm.dox.h:400
StreamWriter output
the output stream for the CSV data
Definition: CsvUtil.qm.dox.h:1467
The CsvWriter class for safe CSV data creation.
Definition: CsvUtil.qm.dox.h:1462
*list getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet...
constructor(string n_errname, *hash n_opts)
Creates the AbstractCsvWriter in single-type mode.
The CsvIterator class allows CSV sources to be iterated on a record basis. The source of the input da...
Definition: CsvUtil.qm.dox.h:1003
int lineNo
the latest line number
Definition: CsvUtil.qm.dox.h:1296
string type(auto arg)
writeLine(list values)
Write a line with a list of values; data are checked against column rules.
hash getValue()
Returns the current record as a hash.
const EOL_WIN
MS DOS/Windows end of line character sequence.
Definition: CsvUtil.qm.dox.h:402
string prepareRawLine(list values)
Prepare a string (line with EOF) with formatting and escaping.
The CsvDataIterator class allows arbitrary CSV string data to be iterated on a record basis...
Definition: CsvUtil.qm.dox.h:1113
string m_file_path
the path of the file being iterated
Definition: CsvUtil.qm.dox.h:1063
writeRawLine(list values)
This method must be overridden in child classes to provide the output implementation.
constructor(string path, *hash opts)
creates the CsvFileWriter in single-type mode with the path of the file to create and an optional opt...
int block
block size for bulk DML
Definition: CsvUtil.qm.dox.h:1299
string identifyType(list rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
bool optimal_quotes
stores the optimal quotes option
Definition: CsvUtil.qm.dox.h:1308
string encoding
output file character encoding
Definition: CsvUtil.qm.dox.h:1278
auto memberGate(string name)
Returns the given column value for the current row.
bool global_compat_force_empty_string
global option to force &quot;*string&quot; fields with no value to return an empty string when parsing rath...
constructor(Qore::AbstractLineIterator li, *hash opts)
Creates the CsvIterator in single-type mode with general line iterator to read and an option hash...
The CsvFileWriter class for safe CSV file creation.
Definition: CsvUtil.qm.dox.h:1505
processCommonOptions(*hash opts, int C_OPTx)
process common options and and assing internal fields
hash hash(object obj)
auto getRecordList()
Returns the current record as a list.
prepareFieldsFromHeaders(*list headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
abstract writeRawLine(list values)
This method must be overridden in child classes to provide the output implementation.
auto memberGate(string name)
calls AbstractCsvIterator::memberGate()
hash< Qore::StatInfo > hstat()
Returns a StatInfo hash of the underlying file.
constructor(Qore::OutputStream output, *hash opts)
creates the CsvWriter in single-type mode with the OutputStream and an optional option hash ...
processSpec(hash spec)
process specification and assing internal data for resolving
constructor(AbstractLineIterator li, *hash opts)
creates the AbstractCsvIterator with an option hash in single-type mode
writeRawLine(list values)
renders the line and writes it to the output stream
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
const CSV_TYPE_SINGLE
Record type when multi-type is disabled.
Definition: CsvUtil.qm.dox.h:412
string getEncoding()
Returns the character encoding for the file.
hash getRecord()
Returns the current record as a hash.
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
writeRawLine(list values)
This method must be overridden in child classes to provide the output implementation.