Qore CsvUtil Module Reference  1.7
AbstractCsvIterator.qc.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // Qore AbstractCsvIterator class definition
3 
4 /* AbstractCsvIterator.qc Copyright 2012 - 2020 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // assume local var scope, do not use "$" for vars, members, and method calls
26 
28 namespace CsvUtil {
30 
273 class AbstractCsvIterator : public Qore::AbstractIterator, protected CsvHelper {
274 
275 public:
276 protected:
278  const Options = {
279  "compat_force_empty_string": C_OPT1|C_OPT2,
280  "date_format": C_OPT1|C_OPT2,
281  "date-format": C_OPT1|C_OPT2,
282  "encoding": C_OPT1|C_OPT2,
283  "eol": C_OPT1|C_OPT2,
284  "extended_record": C_OPT2,
285  "fields": C_OPT1,
286  "header-lines": C_OPT1|C_OPT2,
287  "header_lines": C_OPT1|C_OPT2,
288  "header-names": C_OPT1|C_OPT2,
289  "header_names": C_OPT1|C_OPT2,
290  "header_reorder": C_OPT1|C_OPT2,
291  "headers": C_OPT1,
292  "ignore-empty": C_OPT1|C_OPT2,
293  "ignore_empty": C_OPT1|C_OPT2,
294  "ignore-whitespace": C_OPT1|C_OPT2,
295  "ignore_whitespace": C_OPT1|C_OPT2,
296  "number_format": C_OPT1|C_OPT2,
297  "quote": C_OPT1|C_OPT2,
298  "separator": C_OPT1|C_OPT2,
299  "timezone": C_OPT1|C_OPT2,
300  "tolwr": C_OPT1|C_OPT2,
301  "verify-columns": C_OPT1|C_OPT2,
302  "verify_columns": C_OPT1|C_OPT2,
303  };
304 
305  // field separator
306  string separator = ",";
307 
308  // field content delimiter
309  string quote = "\"";
310 
311  // number of header lines
312  softint headerLines = 0;
313 
314  // flag to use string names from the first header row if possible
315  bool headerNames = False;
316 
317  // True if empty lines should be ignored
318  bool ignoreEmptyLines = True;
319 
320  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
321  bool ignoreWhitespace = True;
322 
323  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
324  *TimeZone timezone;
325 
326  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
327  bool checkElementCounts = False;
328 
329  // getRecord/getValue returns extended hash
330  bool extendedRecord = False;
331 
332  // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
333  bool compat_force_empty_string = False;
334 
335  // read ahead flag
336  bool read_ahead;
337 
338  // column count for verifying column counts
339  int cc;
340 
341  // current record count for the index() method
342  int rc = 0;
343 
344  // to resolve record type by rules
345  hash m_resolve_by_rule;
346 
347  // to resolve record type by number of fields
348  hash m_resolve_by_count;
349 
350  // list of idx to field transformarions, in order of spec
351  hash m_resolve_by_idx;
352 
353  // fake specs based on the first non-header row
354  bool fakeHeaderNames;
355 
356  // data source iterator
357  AbstractLineIterator lineIterator;
358 
359 public:
360 
362 
368  constructor(AbstractLineIterator li, *hash opts);
369 
370 
372 
377  // NOTE: when declared as *hash then always calls this constructor
378  constructor(AbstractLineIterator li, hash spec, hash opts);
379 
380 
382 protected:
383  processCommonOptions(*hash opts, int C_OPTx);
384 public:
385 
386 
388 protected:
389  processSpec(hash spec);
390 public:
391 
392 
394 protected:
395  prepareFieldsFromHeaders(*list headers);
396 public:
397 
398 
399  bool valid();
400 
401 
403 
408  bool next();
409 
410 
412 
414  peek();
415 
416 
418 
425  auto memberGate(string name);
426 
427 
429 
440  hash<auto> getValue();
441 
442 
444 
457  hash<auto> getRecord(bool extended);
458 
459 
461 
472  hash<auto> getRecord();
473 
474 
476 
488  auto getRecordList();
489 
490 
492 
499  string getSeparator();
500 
501 
503 
510  string getQuote();
511 
512 
514  *hash<string, AbstractDataField> getRecordType();
515 
516 
518 
525  *list<string> getHeaders();
526 
527 
529 
534  *list<string> getHeaders(string type);
535 
536 
538 
549  int index();
550 
551 
553 
566  int lineNumber();
567 
568 
570 
579  string getRawLine();
580 
581 
583 
593  list<*string> getRawLineValues();
594 
595 
596 protected:
597  auto handleType(hash<auto> fh, *string val);
598 public:
599 
600 
602 protected:
603  list<*string> getLineAndSplit();
604 public:
605 
606 
608 
615  string identifyType(list<auto> rec);
616 
617 
619 
626 protected:
627  *string identifyTypeImpl(list<auto> rec);
628 public:
629 
630 
632 protected:
633  hash<auto> parseLine();
634 public:
635 
636  }; // AbstractCsvIterator class
637 }; // CsvUtil namespace
CsvUtil::AbstractCsvIterator::processSpec
processSpec(hash spec)
process specification and assing internal data for resolving
CsvUtil::AbstractCsvIterator::getRecordList
auto getRecordList()
Returns the current record as a list.
CsvUtil::AbstractCsvIterator::lineNumber
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
CsvUtil::AbstractCsvIterator::next
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
CsvUtil::AbstractCsvIterator::Options
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: AbstractCsvIterator.qc.dox.h:278
type
string type(auto arg)
CsvUtil::AbstractCsvIterator::getQuote
string getQuote()
Returns the current quote string.
CsvUtil::AbstractCsvIterator
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated
Definition: AbstractCsvIterator.qc.dox.h:273
CsvUtil::AbstractCsvIterator::index
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
CsvUtil::AbstractCsvIterator::getHeaders
*list< string > getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet.
CsvUtil::AbstractCsvIterator::getRawLineValues
list< *string > getRawLineValues()
Returns the list of raw string values of the current line.
CsvUtil::AbstractCsvIterator::getRecord
hash< auto > getRecord()
Returns the current record as a hash.
True
const True
CsvUtil
the CsvUtil namespace. All classes used in the CsvUtil module should be inside this namespace
Definition: AbstractCsvIterator.qc.dox.h:28
CsvUtil::AbstractCsvIterator::identifyTypeImpl
*string identifyTypeImpl(list< auto > rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
CsvUtil::AbstractCsvIterator::getValue
hash< auto > getValue()
Returns the current record as a hash.
CsvUtil::AbstractCsvIterator::getSeparator
string getSeparator()
Returns the current separator string.
CsvUtil::AbstractCsvIterator::identifyType
string identifyType(list< auto > rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
list
list< auto > list(...)
hash
hash< auto > hash(object obj)
CsvUtil::AbstractCsvIterator::getRawLine
string getRawLine()
Returns the current line 'as it is', i.e. the original string.
CsvUtil::AbstractCsvIterator::getRecordType
*hash< string, AbstractDataField > getRecordType()
Returns the description of the record type, if any.
False
const False
CsvUtil::AbstractCsvIterator::prepareFieldsFromHeaders
prepareFieldsFromHeaders(*list headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
CsvUtil::AbstractCsvIterator::peek
peek()
Reads a single row without moving the index position.
CsvUtil::AbstractCsvIterator::constructor
constructor(AbstractLineIterator li, *hash opts)
creates the AbstractCsvIterator with an option hash in single-type mode
CsvUtil::AbstractCsvIterator::parseLine
hash< auto > parseLine()
Parses a line in the file and returns a processed list of the fields.
Qore::AbstractIterator
CsvUtil::AbstractCsvIterator::memberGate
auto memberGate(string name)
Returns the given column value for the current row.
CsvUtil::AbstractCsvIterator::getLineAndSplit
list< *string > getLineAndSplit()
Read line split by separator/quote into list.
CsvUtil::AbstractCsvIterator::processCommonOptions
processCommonOptions(*hash opts, int C_OPTx)
process common options and and assing internal fields