Qore CsvUtil Module Reference  1.3
 All Classes Namespaces Functions Variables Groups Pages
CsvUtil.qm.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // @file CsvUtil.qm Qore user module for working with CSV files
3 
4 /* CsvUtil.qm Copyright 2012 - 2014 Qore Technologies, sro
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // minimum required Qore version
26 
27 
28 /* see release notes below for version history
29 */
30 
132 // private class used to iterate a list and skip elements without any value
133 class ListValueIterator : public ListIterator {
134 
135 public:
136  constructor(*list l);
137 
138 
139  bool next();
140 
141 };
142 
143 class CsvHelper {
144 
145 public:
146  private :
148  const Types = (
149  "int": True,
150  "*int": True,
151  "float": True,
152  "*float": True,
153  "number": True,
154  "*number": True,
155  "string": True,
156  "*string": True,
157  "date": True,
158  "*date": True,
159  );
160 
162  const FieldAttrs = ("type", "format", "timezone", "code");
163 
164 public:
165 
167  private setFields();
168 
169 
170  checkType(string key, string value);
171 
172 }; // class CsvHelper
173 
175 namespace CsvUtil {
177  const EOL_UNIX = "\n";
179  const EOL_WIN = "\r\n";
181  const EOL_MACINTOSH = "\r";
182 
183  // helper list of end of line values
184  const EOLS = (EOL_UNIX, EOL_WIN, EOL_MACINTOSH, );
185 
187 
339 class CsvAbstractIterator : public Qore::AbstractIterator, private CsvHelper {
340 
341 public:
342  private :
344  const Options = (
345  "encoding": True,
346  "separator": True,
347  "quote": True,
348  "eol": True,
349  "ignore-empty": True,
350  "ignore-whitespace": True,
351  "header-lines": True,
352  "header-names": True,
353  "headers": True,
354  "verify-columns": True,
355  "fields": True,
356  "timezone": True,
357  );
358 
359 public:
360 
361  private :
362  // field separator
363  string separator = ",";
364 
365  // field content delimiter
366  string quote = "\"";
367 
368  // number of header lines
369  softint headerLines = 0;
370 
371  // flag to use string names from the first header row if possible
372  bool headerNames = False;
373 
374  // True if empty lines should be ignored
375  bool ignoreEmptyLines = True;
376 
377  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
378  bool ignoreWhitespace = True;
379 
380  // headers / column names for lines iterated
381  *softlist headers;
382 
383  // hash of field information (types, formats, and possible code), hash key = column name or number (starting with 0)
384  *hash fields;
385 
386  // list of field descriptions (from fields, ordered when headers are set)
387  *list fdesc;
388 
389  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
390  *TimeZone tz;
391 
392  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
393  bool checkElementCounts = False;
394 
395  // column count for verifying column counts
396  int cc;
397 
398  // current record count for the index() method
399  int rc = 0;
400 
401 public:
402 
404 
408  constructor(*hash opts);
409 
410 
412  private *string getDataName();
413 
414 
416  private abstract int lineNumberImpl();
417 
419  private abstract string getLineValueImpl();
420 
422  private abstract bool nextLineImpl();
423 
425 
430  bool next();
431 
432 
434 
441  any memberGate(string name);
442 
443 
445 
454  hash getValue();
455 
456 
458 
467  hash getRecord();
468 
469 
471 
481 
482 
484 
491  string getSeparator();
492 
493 
495 
502  string getQuote();
503 
504 
506 
512  *list getHeaders();
513 
514 
516 
527  int index();
528 
529 
531 
546  int lineNumber();
547 
548 
549  private any handleType(hash fh, *string val);
550 
551 
553  private list parseLine();
554 
555  };
556 
558 
563 class CsvFileIterator : public CsvUtil::CsvAbstractIterator,public Qore::FileLineIterator, private CsvHelper {
564 
565 public:
567 
572  constructor(string path, *hash opts);
573 
574 
576  private *string getDataName();
577 
578 
580  private int lineNumberImpl();
581 
582 
584  private string getLineValueImpl();
585 
586 
588  private bool nextLineImpl();
589 
590  }; // CsvFileIterator class
591 
593 
598 class CsvDataIterator : public CsvUtil::CsvAbstractIterator, private CsvHelper {
599 
600 public:
601  private :
603  string data;
605  *string eol;
607  *string line;
609  int pos = 0;
611  int lineno = 0;
613  bool valid = False;
614 
615 public:
616 
618 
623  constructor(string data, *hash opts);
624 
625 
627 
635  bool valid();
636 
637 
639  private int lineNumberImpl();
640 
641 
643  private string getLineValueImpl();
644 
645 
647  private bool nextLineImpl();
648 
649  };
650 
652 
718 class AbstractCsvWriter : private CsvHelper {
719 
720 public:
721  private :
723  const Options = (
724  "encoding": True,
725  "separator": True,
726  "quote": True,
727  "eol": True,
728  "verify-columns": True,
729  "fields": True,
730  "headers": True,
731  "date-format": True,
732  "write-headers": True,
733  "optimal-quotes": True,
734  );
735 
736 public:
737 
738  private :
739  // TODO/FIXME
740  string encoding = get_default_encoding();
741 
742  // field separator
743  string separator = ",";
744 
745  // field content delimiter
746  string quote = "\"";
747 
748  // end of line sequence
749  string eol = EOL_UNIX;
750 
751  // default date->string format
752  string dateFormat = 'DD/MM/YYYY hh:mm:SS';
753 
754  // headers / column names for lines iterated
755  *softlist headers;
756 
757  // hash of field information (types, formats, and possible code), hash key = column name or number (starting with 0)
758  *hash fields;
759 
760  // list of field descriptions (from fields, ordered when headers are set)
761  *list fdesc;
762 
763  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
764  bool checkElementCounts = False;
765 
766  // the latest line number
767  int lineNo = 0;
768 
769  // base template for value format
770  string baseTemplate;
771 
772  string errname;
773 
774  // this flag determines if any stored headers are output
775  bool write_headers = True;
776 
777  // stores the optimal quotes option
778  bool optimal_quotes = True;
779 
780 public:
781 
783 
789  constructor(string errname, *hash opts);
790 
791 
793 
798  writeLine(list values);
799 
800 
802 
807  writeLine(hash values);
808 
809 
811 
818  write(AbstractIterator iterator);
819 
820 
822  abstract private writeRawLine(list values);
823 
825 
829  private string prepareRawLine(list values);
830 
831 
833  private string dateFormat(int ix);
834 
835 
836  }; // AbstractCsvWriter class
837 
840 
841 public:
842 
843  private :
844  // a file to write
845  File file;
846 
847 public:
848 
850 
858  constructor(string path, *hash opts);
859 
860 
861  private writeRawLine(list values);
862 
863 
864  }; // CsvFileWriter
865 
868 
869 public:
870 
871  private :
872  // a csv content
873  string content;
874 
875 public:
876 
878 
883  constructor(*hash opts);
884 
885 
886  private writeRawLine(list values);
887 
888 
890  string getContent();
891 
892 
893  }; // CsvStringWriter
894 
895 }; // CsvUtil namespace
896 
private writeRawLine(list values)
real write implementation. Without any checking.
string getQuote()
returns the current quote string
string get_default_encoding()
list getRecordList()
returns the current record as a list
constructor(string data, *hash opts)
creates the CsvDataIterator with the input data and optionally an option hash
constructor(*hash opts)
creates the CsvStringWriter with content in the memory
private string prepareRawLine(list values)
Prepare a string (line with EOF) with formatting and escaping.
private list parseLine()
parses a line in the file and returns a processed list of the fields
the CsvAbstractIterator class is an abstract base class that allows abstract CSV data to be iterated ...
Definition: CsvUtil.qm.dox.h:339
*list getHeaders()
returns the current column headers or NOTHING if no headers have been detected or saved yet ...
write(AbstractIterator iterator)
stream iterator into the file.
private *string getDataName()
Returns the name of the input data.
*string eol
EOL marker.
Definition: CsvUtil.qm.dox.h:605
int lineNumber()
returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
hash getValue()
returns the current record as a hash
constructor(string errname, *hash opts)
creates the AbstractCsvWriter
hash getRecord()
returns the current record as a hash
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
const True
private writeRawLine(list values)
real write implementation. Without any checking.
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:344
int lineno
current line number
Definition: CsvUtil.qm.dox.h:611
int pos
current byte pos
Definition: CsvUtil.qm.dox.h:609
private int lineNumberImpl()
Returns the current line number.
constructor(string path, *hash opts)
creates the CsvFileIterator with the path of the file to read and optionally an option hash ...
const False
private *string getDataName()
Returns the name of the input data.
list list(...)
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:723
abstract private string getLineValueImpl()
Returns the current line.
the CsvFileIterator class allows CSV files to be iterated on a record basis
Definition: CsvUtil.qm.dox.h:563
string getContent()
Get the current in-memory content as a string.
constructor(*hash opts)
creates the CsvAbstractIterator with an option hash
the CsvStringWriter class for in-memory string CSV creation
Definition: CsvUtil.qm.dox.h:867
the AbstractCsvWriter class provides a parent for all CSV writers
Definition: CsvUtil.qm.dox.h:718
const EOL_MACINTOSH
Old (pre-OSX) Macintosh end of line character sequence.
Definition: CsvUtil.qm.dox.h:181
any memberGate(string name)
returns the given column value for the current row
private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
const EOL_UNIX
Unix end of line character sequence (for new OS X too)
Definition: CsvUtil.qm.dox.h:177
*string line
current line
Definition: CsvUtil.qm.dox.h:607
private int lineNumberImpl()
Returns the current line number; returns 0 if not pointing at any data.
writeLine(list values)
write a line with list of values. Data are checked against column rules.
int index()
returns the row index being iterated, which does not necessarily correspond to the line number when t...
string getSeparator()
returns the current separator string
private string getLineValueImpl()
Returns the current line trimmed of the EOL character(s)
const EOL_WIN
MS DOS/Windows end of line character sequence.
Definition: CsvUtil.qm.dox.h:179
the CsvDataIterator class allows arbitrary CSV string data to be iterated on a record basis ...
Definition: CsvUtil.qm.dox.h:598
bool valid()
returns True if the iterator is currently pointing at a valid element, False if not ...
constructor(string path, *hash opts)
creates the CsvFileWriter with the path of the file to read with an options
abstract private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
abstract private int lineNumberImpl()
Returns the current line number.
string data
input data
Definition: CsvUtil.qm.dox.h:603
the CsvFileWriter class for easy and safe CSV file creation
Definition: CsvUtil.qm.dox.h:839
hash hash(object obj)
private string getLineValueImpl()
Returns the current line trimmed of the EOL character(s)
abstract private writeRawLine(list values)
real write implementation. Without any checking.