You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
376 lines
12 KiB
C++
376 lines
12 KiB
C++
/*=========================================================================
|
|
|
|
Program: Visualization Toolkit
|
|
Module: vtkDelimitedTextReader.h
|
|
|
|
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
|
|
All rights reserved.
|
|
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
|
|
|
|
This software is distributed WITHOUT ANY WARRANTY; without even
|
|
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
PURPOSE. See the above copyright notice for more information.
|
|
|
|
=========================================================================*/
|
|
/*-------------------------------------------------------------------------
|
|
Copyright 2008 Sandia Corporation.
|
|
Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
the U.S. Government retains certain rights in this software.
|
|
-------------------------------------------------------------------------*/
|
|
|
|
/**
|
|
* @class vtkDelimitedTextReader
|
|
* @brief reads in delimited ascii or unicode text files
|
|
* and outputs a vtkTable data structure.
|
|
*
|
|
*
|
|
* vtkDelimitedTextReader is an interface for pulling in data from a
|
|
* flat, delimited ascii or unicode text file (delimiter can be any character).
|
|
*
|
|
* The behavior of the reader with respect to ascii or unicode input
|
|
* is controlled by the SetUnicodeCharacterSet() method. By default
|
|
* (without calling SetUnicodeCharacterSet()), the reader will expect
|
|
* to read ascii text and will output vtkStdString columns. Use the
|
|
* Set and Get methods to set delimiters that do not contain UTF8 in
|
|
* the name when operating the reader in default ascii mode. If the
|
|
* SetUnicodeCharacterSet() method is called, the reader will output
|
|
* vtkUnicodeString columns in the output table. In addition, it is
|
|
* necessary to use the Set and Get methods that contain UTF8 in the
|
|
* name to specify delimiters when operating in unicode mode.
|
|
*
|
|
* There is also a special character set US-ASCII-WITH-FALLBACK that
|
|
* will treat the input text as ASCII no matter what. If and when it
|
|
* encounters a character with its 8th bit set it will replace that
|
|
* character with the code point ReplacementCharacter. You may use
|
|
* this if you have text that belongs to a code page like LATIN9 or
|
|
* ISO-8859-1 or friends: mostly ASCII but not entirely. Eventually
|
|
* this class will acquire the ability to read gracefully text from
|
|
* any code page, making this option obsolete.
|
|
*
|
|
* This class emits ProgressEvent for every 100 lines it reads.
|
|
*
|
|
* @par Thanks:
|
|
* Thanks to Andy Wilson, Brian Wylie, Tim Shead, and Thomas Otahal
|
|
* from Sandia National Laboratories for implementing this class.
|
|
*
|
|
*
|
|
* @warning
|
|
* This reader assumes that the first line in the file (whether that's
|
|
* headers or the first document) contains at least as many fields as
|
|
* any other line in the file.
|
|
*/
|
|
|
|
#ifndef vtkDelimitedTextReader_h
|
|
#define vtkDelimitedTextReader_h
|
|
|
|
#include "vtkIOInfovisModule.h" // For export macro
|
|
#include "vtkStdString.h" // Needed for vtkStdString
|
|
#include "vtkTableAlgorithm.h"
|
|
#include "vtkUnicodeString.h" // Needed for vtkUnicodeString
|
|
|
|
class VTKIOINFOVIS_EXPORT vtkDelimitedTextReader : public vtkTableAlgorithm
|
|
{
|
|
public:
|
|
static vtkDelimitedTextReader* New();
|
|
vtkTypeMacro(vtkDelimitedTextReader, vtkTableAlgorithm);
|
|
void PrintSelf(ostream& os, vtkIndent indent) override;
|
|
|
|
//@{
|
|
/**
|
|
* Specifies the delimited text file to be loaded.
|
|
*/
|
|
vtkGetStringMacro(FileName);
|
|
vtkSetStringMacro(FileName);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Specify the InputString for use when reading from a character array.
|
|
* Optionally include the length for binary strings. Note that a copy
|
|
* of the string is made and stored. If this causes exceedingly large
|
|
* memory consumption, consider using InputArray instead.
|
|
*/
|
|
void SetInputString(const char* in);
|
|
vtkGetStringMacro(InputString);
|
|
void SetInputString(const char* in, int len);
|
|
vtkGetMacro(InputStringLength, int);
|
|
void SetInputString(const vtkStdString& input)
|
|
{
|
|
this->SetInputString(input.c_str(), static_cast<int>(input.length()));
|
|
}
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Enable reading from an InputString or InputArray instead of the default,
|
|
* a file.
|
|
*/
|
|
vtkSetMacro(ReadFromInputString, vtkTypeBool);
|
|
vtkGetMacro(ReadFromInputString, vtkTypeBool);
|
|
vtkBooleanMacro(ReadFromInputString, vtkTypeBool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Specifies the character set used in the input file. Valid character set
|
|
* names will be drawn from the list maintained by the Internet Assigned Name
|
|
* Authority at
|
|
|
|
* http://www.iana.org/assignments/character-sets
|
|
|
|
* Where multiple aliases are provided for a character set, the preferred MIME name
|
|
* will be used. vtkUnicodeDelimitedTextReader currently supports "US-ASCII", "UTF-8",
|
|
* "UTF-16", "UTF-16BE", and "UTF-16LE" character sets.
|
|
*/
|
|
vtkGetStringMacro(UnicodeCharacterSet);
|
|
vtkSetStringMacro(UnicodeCharacterSet);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Specify the character(s) that will be used to separate records.
|
|
* The order of characters in the string does not matter. Defaults
|
|
* to "\r\n".
|
|
*/
|
|
void SetUTF8RecordDelimiters(const char* delimiters);
|
|
const char* GetUTF8RecordDelimiters();
|
|
void SetUnicodeRecordDelimiters(const vtkUnicodeString& delimiters);
|
|
vtkUnicodeString GetUnicodeRecordDelimiters();
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Specify the character(s) that will be used to separate fields. For
|
|
* example, set this to "," for a comma-separated value file. Set
|
|
* it to ".:;" for a file where columns can be separated by a
|
|
* period, colon or semicolon. The order of the characters in the
|
|
* string does not matter. Defaults to a comma.
|
|
*/
|
|
vtkSetStringMacro(FieldDelimiterCharacters);
|
|
vtkGetStringMacro(FieldDelimiterCharacters);
|
|
//@}
|
|
|
|
void SetUTF8FieldDelimiters(const char* delimiters);
|
|
const char* GetUTF8FieldDelimiters();
|
|
void SetUnicodeFieldDelimiters(const vtkUnicodeString& delimiters);
|
|
vtkUnicodeString GetUnicodeFieldDelimiters();
|
|
|
|
//@{
|
|
/**
|
|
* Get/set the character that will begin and end strings. Microsoft
|
|
* Excel, for example, will export the following format:
|
|
|
|
* "First Field","Second Field","Field, With, Commas","Fourth Field"
|
|
|
|
* The third field has a comma in it. By using a string delimiter,
|
|
* this will be correctly read. The delimiter defaults to '"'.
|
|
*/
|
|
vtkGetMacro(StringDelimiter, char);
|
|
vtkSetMacro(StringDelimiter, char);
|
|
//@}
|
|
|
|
void SetUTF8StringDelimiters(const char* delimiters);
|
|
const char* GetUTF8StringDelimiters();
|
|
void SetUnicodeStringDelimiters(const vtkUnicodeString& delimiters);
|
|
vtkUnicodeString GetUnicodeStringDelimiters();
|
|
|
|
//@{
|
|
/**
|
|
* Set/get whether to use the string delimiter. Defaults to on.
|
|
*/
|
|
vtkSetMacro(UseStringDelimiter, bool);
|
|
vtkGetMacro(UseStringDelimiter, bool);
|
|
vtkBooleanMacro(UseStringDelimiter, bool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Set/get whether to treat the first line of the file as headers.
|
|
* The default is false (no headers).
|
|
*/
|
|
vtkGetMacro(HaveHeaders, bool);
|
|
vtkSetMacro(HaveHeaders, bool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Set/get whether to merge successive delimiters. Use this if (for
|
|
* example) your fields are separated by spaces but you don't know
|
|
* exactly how many.
|
|
*/
|
|
vtkSetMacro(MergeConsecutiveDelimiters, bool);
|
|
vtkGetMacro(MergeConsecutiveDelimiters, bool);
|
|
vtkBooleanMacro(MergeConsecutiveDelimiters, bool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Specifies the maximum number of records to read from the file. Limiting the
|
|
* number of records to read is useful for previewing the contents of a file.
|
|
*/
|
|
vtkGetMacro(MaxRecords, vtkIdType);
|
|
vtkSetMacro(MaxRecords, vtkIdType);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* When set to true, the reader will detect numeric columns and create
|
|
* vtkDoubleArray or vtkIntArray for those instead of vtkStringArray. Default
|
|
* is off.
|
|
*/
|
|
vtkSetMacro(DetectNumericColumns, bool);
|
|
vtkGetMacro(DetectNumericColumns, bool);
|
|
vtkBooleanMacro(DetectNumericColumns, bool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* When set to true and DetectNumericColumns is also true, forces all
|
|
* numeric columns to vtkDoubleArray even if they contain only
|
|
* integer values. Default is off.
|
|
*/
|
|
vtkSetMacro(ForceDouble, bool);
|
|
vtkGetMacro(ForceDouble, bool);
|
|
vtkBooleanMacro(ForceDouble, bool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* When DetectNumericColumns is set to true, whether to trim whitespace from
|
|
* strings prior to conversion to a numeric.
|
|
* Default is false to preserve backward compatibility.
|
|
|
|
* vtkVariant handles whitespace inconsistently, so trim it before we try to
|
|
* convert it. For example:
|
|
|
|
* vtkVariant(" 2.0").ToDouble() == 2.0 <-- leading whitespace is not a problem
|
|
* vtkVariant(" 2.0 ").ToDouble() == NaN <-- trailing whitespace is a problem
|
|
* vtkVariant(" infinity ").ToDouble() == NaN <-- any whitespace is a problem
|
|
|
|
* In these cases, trimming the whitespace gives us the result we expect:
|
|
* 2.0 and INF respectively.
|
|
*/
|
|
vtkSetMacro(TrimWhitespacePriorToNumericConversion, bool);
|
|
vtkGetMacro(TrimWhitespacePriorToNumericConversion, bool);
|
|
vtkBooleanMacro(TrimWhitespacePriorToNumericConversion, bool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* When DetectNumericColumns is set to true, the reader use this value to populate
|
|
* the vtkIntArray where empty strings are found. Default is 0.
|
|
*/
|
|
vtkSetMacro(DefaultIntegerValue, int);
|
|
vtkGetMacro(DefaultIntegerValue, int);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* When DetectNumericColumns is set to true, the reader use this value to populate
|
|
* the vtkDoubleArray where empty strings are found. Default is 0.0
|
|
*/
|
|
vtkSetMacro(DefaultDoubleValue, double);
|
|
vtkGetMacro(DefaultDoubleValue, double);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* The name of the array for generating or assigning pedigree ids
|
|
* (default "id").
|
|
*/
|
|
vtkSetStringMacro(PedigreeIdArrayName);
|
|
vtkGetStringMacro(PedigreeIdArrayName);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* If on (default), generates pedigree ids automatically.
|
|
* If off, assign one of the arrays to be the pedigree id.
|
|
*/
|
|
vtkSetMacro(GeneratePedigreeIds, bool);
|
|
vtkGetMacro(GeneratePedigreeIds, bool);
|
|
vtkBooleanMacro(GeneratePedigreeIds, bool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* If on, assigns pedigree ids to output. Defaults to off.
|
|
*/
|
|
vtkSetMacro(OutputPedigreeIds, bool);
|
|
vtkGetMacro(OutputPedigreeIds, bool);
|
|
vtkBooleanMacro(OutputPedigreeIds, bool);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* If on, also add in the tab (i.e. '\t') character as a field delimiter.
|
|
* We add this specially since applications may have a more
|
|
* difficult time doing this. Defaults to off.
|
|
*/
|
|
vtkSetMacro(AddTabFieldDelimiter, bool);
|
|
vtkGetMacro(AddTabFieldDelimiter, bool);
|
|
vtkBooleanMacro(AddTabFieldDelimiter, bool);
|
|
//@}
|
|
|
|
/**
|
|
* Returns a human-readable description of the most recent error, if any.
|
|
* Otherwise, returns an empty string. Note that the result is only valid
|
|
* after calling Update().
|
|
*/
|
|
vtkStdString GetLastError();
|
|
|
|
//@{
|
|
/**
|
|
* Fallback character for use in the US-ASCII-WITH-FALLBACK
|
|
* character set. Any characters that have their 8th bit set will
|
|
* be replaced with this code point. Defaults to 'x'.
|
|
*/
|
|
vtkSetMacro(ReplacementCharacter, vtkTypeUInt32);
|
|
vtkGetMacro(ReplacementCharacter, vtkTypeUInt32);
|
|
//@}
|
|
|
|
protected:
|
|
vtkDelimitedTextReader();
|
|
~vtkDelimitedTextReader() override;
|
|
|
|
int RequestData(vtkInformation*, vtkInformationVector**, vtkInformationVector*) override;
|
|
|
|
// Read the content of the input file.
|
|
int ReadData(vtkTable* const output_table);
|
|
|
|
char* FileName;
|
|
vtkTypeBool ReadFromInputString;
|
|
char* InputString;
|
|
int InputStringLength;
|
|
char* UnicodeCharacterSet;
|
|
vtkIdType MaxRecords;
|
|
vtkUnicodeString UnicodeRecordDelimiters;
|
|
vtkUnicodeString UnicodeFieldDelimiters;
|
|
vtkUnicodeString UnicodeStringDelimiters;
|
|
vtkUnicodeString UnicodeWhitespace;
|
|
vtkUnicodeString UnicodeEscapeCharacter;
|
|
bool DetectNumericColumns;
|
|
bool ForceDouble;
|
|
bool TrimWhitespacePriorToNumericConversion;
|
|
int DefaultIntegerValue;
|
|
double DefaultDoubleValue;
|
|
char* FieldDelimiterCharacters;
|
|
char StringDelimiter;
|
|
bool UseStringDelimiter;
|
|
bool HaveHeaders;
|
|
bool UnicodeOutputArrays;
|
|
bool MergeConsecutiveDelimiters;
|
|
char* PedigreeIdArrayName;
|
|
bool GeneratePedigreeIds;
|
|
bool OutputPedigreeIds;
|
|
bool AddTabFieldDelimiter;
|
|
vtkStdString LastError;
|
|
vtkTypeUInt32 ReplacementCharacter;
|
|
|
|
private:
|
|
vtkDelimitedTextReader(const vtkDelimitedTextReader&) = delete;
|
|
void operator=(const vtkDelimitedTextReader&) = delete;
|
|
};
|
|
|
|
#endif
|