You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
286 lines
9.2 KiB
C++
286 lines
9.2 KiB
C++
/*=========================================================================
|
|
|
|
Program: Visualization Toolkit
|
|
Module: vtkUnicodeString.h
|
|
|
|
-------------------------------------------------------------------------
|
|
Copyright 2008 Sandia Corporation.
|
|
Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
the U.S. Government retains certain rights in this software.
|
|
-------------------------------------------------------------------------
|
|
|
|
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
|
|
All rights reserved.
|
|
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
|
|
|
|
This software is distributed WITHOUT ANY WARRANTY; without even
|
|
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
PURPOSE. See the above copyright notice for more information.
|
|
|
|
=========================================================================*/
|
|
|
|
/**
|
|
* @class vtkUnicodeString
|
|
* @brief String class that stores Unicode text.
|
|
*
|
|
*
|
|
* vtkUnicodeString provides storage for Unicode text. Conceptually, it
|
|
* acts as a container for a sequence of Unicode characters, providing a
|
|
* public interface similar to basic_string<>. For character-oriented
|
|
* operations, this means reading / writing 32-bit UTF-32 / UCS-4 characters.
|
|
* Internally, characters may be stored using variable-length encodings for
|
|
* efficiency. Explicit conversions to-and-from other encodings are provided,
|
|
* and implicit conversions are deliberately avoided, to avoid confusion.
|
|
*
|
|
* Note that, because vtkUnicodeString uses variable-length encodings for
|
|
* storage, character-oriented operations will generally provide O(N) access
|
|
* instead of O(1).
|
|
*
|
|
* The current implementation stores the sequence with UTF-8 encoding, but
|
|
* this choice is subject to change and might become a compile-time or
|
|
* run-time option.
|
|
*
|
|
* @par Thanks:
|
|
* Developed by Timothy M. Shead (tshead@sandia.gov) at Sandia National Laboratories.
|
|
*/
|
|
|
|
#ifndef vtkUnicodeString_h
|
|
#define vtkUnicodeString_h
|
|
|
|
#include "vtkCommonCoreModule.h" // For export macro
|
|
#include "vtkSystemIncludes.h"
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
class vtkUnicodeString;
|
|
|
|
typedef vtkTypeUInt32 vtkUnicodeStringValueType;
|
|
|
|
//
|
|
// The following should be eventually placed in vtkSetGet.h
|
|
//
|
|
|
|
// This is same as extra extended template macro with an
|
|
// additional case for VTK_UNICODE_STRING
|
|
#define vtkSuperExtraExtendedTemplateMacro(call) \
|
|
vtkExtraExtendedTemplateMacro(call); \
|
|
vtkTemplateMacroCase(VTK_UNICODE_STRING, vtkUnicodeString, call)
|
|
|
|
class VTKCOMMONCORE_EXPORT vtkUnicodeString
|
|
{
|
|
public:
|
|
typedef vtkUnicodeStringValueType value_type;
|
|
typedef std::string::size_type size_type;
|
|
|
|
class VTKCOMMONCORE_EXPORT const_iterator
|
|
{
|
|
public:
|
|
typedef std::bidirectional_iterator_tag iterator_category;
|
|
typedef vtkUnicodeStringValueType value_type;
|
|
typedef std::string::difference_type difference_type;
|
|
typedef value_type* pointer;
|
|
typedef value_type& reference;
|
|
|
|
const_iterator();
|
|
|
|
value_type operator*() const;
|
|
bool operator==(const const_iterator&) const;
|
|
bool operator!=(const const_iterator&) const;
|
|
const_iterator& operator++();
|
|
const_iterator operator++(int);
|
|
const_iterator& operator--();
|
|
const_iterator operator--(int);
|
|
|
|
private:
|
|
const_iterator(std::string::const_iterator);
|
|
friend class vtkUnicodeString;
|
|
std::string::const_iterator Position;
|
|
};
|
|
|
|
/**
|
|
* Constructs an empty string.
|
|
*/
|
|
vtkUnicodeString();
|
|
/**
|
|
* Makes a deep-copy of another sequence.
|
|
*/
|
|
vtkUnicodeString(const vtkUnicodeString&);
|
|
/**
|
|
* Constructs a sequence of repeated characters. Note: throws an exception if
|
|
* the character isn't a valid Unicode code point.
|
|
*/
|
|
vtkUnicodeString(size_type count, value_type character);
|
|
/**
|
|
* Constructs a string from a sequence of Unicode characters.
|
|
*/
|
|
vtkUnicodeString(const_iterator begin, const_iterator end);
|
|
|
|
//@{
|
|
/**
|
|
* Tests a sequence of bytes, returning true iff they are a valid UTF-8 sequence.
|
|
*/
|
|
static bool is_utf8(const char*);
|
|
static bool is_utf8(const std::string&);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Constructs a string from a null-terminated sequence of UTF-8 encoded characters.
|
|
*/
|
|
static vtkUnicodeString from_utf8(const char*);
|
|
// Constructs a string from a half-open sequence of UTF-8 encoded characters.
|
|
static vtkUnicodeString from_utf8(const char* begin, const char* end);
|
|
// Constructs a string from a sequence of UTF-8 encoded characters.
|
|
static vtkUnicodeString from_utf8(const std::string&);
|
|
//@}
|
|
/**
|
|
* Constructs a string from a null-terminated sequence of UTF-16 encoded characters.
|
|
*/
|
|
static vtkUnicodeString from_utf16(const vtkTypeUInt16*);
|
|
|
|
/**
|
|
* Replaces the current sequence with a deep copy of another.
|
|
*/
|
|
vtkUnicodeString& operator=(const vtkUnicodeString&);
|
|
|
|
/**
|
|
* Returns a forward iterator that points at the first element of the sequence
|
|
* (or just beyond the end of an empty sequence).
|
|
*/
|
|
const_iterator begin() const;
|
|
/**
|
|
* Returns a forward iterator that points just beyond the end of the sequence.
|
|
*/
|
|
const_iterator end() const;
|
|
|
|
/**
|
|
* Returns the Unicode character at the given character offset within the sequence,
|
|
* or throws std::out_of_range if the offset is invalid.
|
|
*/
|
|
value_type at(size_type offset) const;
|
|
/**
|
|
* Returns the Unicode character at the given character offset within the sequence.
|
|
* Behavior is undefined if the position is invalid.
|
|
*/
|
|
value_type operator[](size_type offset) const;
|
|
|
|
/**
|
|
* Returns the sequence as a null-terminated sequence of UTF-8 encoded characters.
|
|
*/
|
|
const char* utf8_str() const;
|
|
/**
|
|
* Inserts the sequence into the supplied storage as a collection of UTF-8 encoded
|
|
* characters.
|
|
*/
|
|
void utf8_str(std::string& result) const;
|
|
/**
|
|
* Returns the sequence as a collection of UTF-16 encoded characters.
|
|
*/
|
|
std::vector<vtkTypeUInt16> utf16_str() const;
|
|
/**
|
|
* Inserts the sequence into the supplied storage as a collection of UTF-16 encoded
|
|
* characters
|
|
*/
|
|
void utf16_str(std::vector<vtkTypeUInt16>& result) const;
|
|
|
|
/**
|
|
* Returns the number of bytes (not characters) in the sequence.
|
|
*/
|
|
size_type byte_count() const;
|
|
/**
|
|
* Returns the number of characters (not bytes) in the sequence.
|
|
*/
|
|
size_type character_count() const;
|
|
/**
|
|
* Returns true if the string contains an empty sequence.
|
|
*/
|
|
bool empty() const;
|
|
|
|
/**
|
|
* The largest representable value of size_type, used as a special-code.
|
|
*/
|
|
static const size_type npos;
|
|
|
|
/**
|
|
* Append a Unicode character to the end of the sequence.
|
|
*/
|
|
vtkUnicodeString& operator+=(value_type);
|
|
/**
|
|
* Append a Unicode sequence to the end of the current sequence.
|
|
*/
|
|
vtkUnicodeString& operator+=(const vtkUnicodeString& rhs);
|
|
|
|
/**
|
|
* Append a Unicode character to the end of the sequence.
|
|
*/
|
|
void push_back(value_type);
|
|
|
|
//@{
|
|
/**
|
|
* Append Unicode to the current sequence.
|
|
*/
|
|
void append(const vtkUnicodeString& value);
|
|
void append(size_type count, value_type character);
|
|
void append(const_iterator begin, const_iterator end);
|
|
//@}
|
|
|
|
//@{
|
|
/**
|
|
* Replace the current sequence with another.
|
|
*/
|
|
void assign(const vtkUnicodeString& value);
|
|
void assign(size_type count, value_type character);
|
|
void assign(const_iterator begin, const_iterator end);
|
|
//@}
|
|
|
|
/**
|
|
* Resets the string to an empty sequence
|
|
*/
|
|
void clear();
|
|
|
|
/**
|
|
* Returns a copy of the current sequence, modified so that differences in case are
|
|
* eliminated. Thus, you can run fold_case() on two strings and then comparse them
|
|
* to obtain a case-insensitive comparison. Note that the string returned by
|
|
* fold_case() may be larger than the original source sequence,
|
|
|
|
* See http://www.unicode.org/Public/UNIDATA/CaseFolding.txt for details.
|
|
*/
|
|
vtkUnicodeString fold_case() const;
|
|
|
|
/**
|
|
* Returns a negative value if the sequence compares less-than the
|
|
* operand sequence, zero if the two sequences compare equal, or
|
|
* a positive value otherwise. Note that the definition of "less-than"
|
|
* is undefined, so you should use some other method if you wish to
|
|
* establish a specific ordering (such as alphabetical).
|
|
*/
|
|
int compare(const vtkUnicodeString&) const;
|
|
|
|
/**
|
|
* Returns a subset of the current sequence that up-to 'count' characters in length,
|
|
* starting at character 'offset'.
|
|
*/
|
|
vtkUnicodeString substr(size_type offset = 0, size_type count = npos) const;
|
|
|
|
/**
|
|
* Swap the sequences stored by two strings.
|
|
*/
|
|
void swap(vtkUnicodeString&);
|
|
|
|
private:
|
|
std::string Storage;
|
|
class back_insert_iterator;
|
|
};
|
|
|
|
VTKCOMMONCORE_EXPORT bool operator==(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs);
|
|
VTKCOMMONCORE_EXPORT bool operator!=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs);
|
|
VTKCOMMONCORE_EXPORT bool operator<(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs);
|
|
VTKCOMMONCORE_EXPORT bool operator<=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs);
|
|
VTKCOMMONCORE_EXPORT bool operator>=(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs);
|
|
VTKCOMMONCORE_EXPORT bool operator>(const vtkUnicodeString& lhs, const vtkUnicodeString& rhs);
|
|
|
|
#endif
|
|
|
|
// VTK-HeaderTest-Exclude: vtkUnicodeString.h
|