/*========================================================================= Program: Visualization Toolkit Module: vtkParseString.h Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen All rights reserved. See Copyright.txt or http://www.kitware.com/Copyright.htm for details. This software is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the above copyright notice for more information. =========================================================================*/ /*------------------------------------------------------------------------- Copyright (c) 2012 David Gobbi. Contributed to the VisualizationToolkit by the author in April 2012 under the terms of the Visualization Toolkit 2008 copyright. -------------------------------------------------------------------------*/ /** This file provides string handling routines. The two important jobs done by these routines are string tokenization and string caching. Tokenization is done as per the rules of a C++ preprocessor, and breaks the strings into ids, literals, and operators. Any string is a valid input for the tokenizer, and it is up to the parser to decide if the resulting tokens are valid within the grammar. The two primary tokenization functions are vtkParse_InitTokenizer() and vtkParse_NextToken(). Caching refers to how string memory management is done. The parser uses "const char *" for all strings, and expects all strings to be persistent and constant. These conditions are automatically met by static strings, but dynamically-generated strings must be cached until the parse is complete. The primary caching functions are vtkParse_CacheString() and vtkParse_FreeStringCache(). */ #ifndef vtkParseString_h #define vtkParseString_h #include "vtkWrappingToolsModule.h" #include #ifdef __cplusplus extern "C" { #endif /** * Various important char types for tokenization */ typedef enum _parse_char_type { CPRE_NONDIGIT = 0x01, /* A-Z a-z and _ */ CPRE_DIGIT = 0x02, /* 0-9 */ CPRE_XDIGIT = 0x03, /* 0-9 A-Z a-z and _ */ CPRE_EXTEND = 0x04, /* non-ascii character */ CPRE_ID = 0x05, /* starting char for identifier */ CPRE_XID = 0x07, /* continuing char for identifier */ CPRE_HEX = 0x08, /* 0-9 A-F a-f hexadecimal digits */ CPRE_SIGN = 0x10, /* +- (sign for floats) */ CPRE_QUOTE = 0x20, /* " and ' */ CPRE_HSPACE = 0x40, /* space, tab, carriage return */ CPRE_VSPACE = 0x80, /* newline, vertical tab, form feed */ CPRE_WHITE = 0xC0, /* all whitespace characters */ } parse_char_type; /** * check if a char is of a certain type */ VTKWRAPPINGTOOLS_EXPORT int vtkParse_CharType(char c, int bits); /** * Whitespace types that can be used with the tokenizer. * - WS_DEFAULT treats newlines and formfeeds as regular whitespace. * - WS_PREPROC treats newline as end-of-line, not as whitespace. * - WS_COMMENT treats comments as tokens, not as whitespace. */ typedef enum _parse_space_t { WS_DEFAULT = CPRE_WHITE, /* skip all whitespace */ WS_PREPROC = CPRE_HSPACE, /* skip horizontal whitespace only */ WS_COMMENT = (CPRE_WHITE | 0x100), /* comments as tokens */ } parse_space_t; /** * Preprocessor tokens for C++. */ typedef enum _preproc_token_t { TOK_OTHER = 257, TOK_ID, /* any id */ TOK_CHAR, /* char literal */ TOK_STRING, /* string literal */ TOK_NUMBER, /* any numeric literal */ TOK_COMMENT, /* C or C++ comment */ TOK_DBLHASH, /* ## */ TOK_SCOPE, /* :: */ TOK_INCR, /* ++ */ TOK_DECR, /* -- */ TOK_RSHIFT, /* >> */ TOK_LSHIFT, /* << */ TOK_AND, /* && */ TOK_OR, /* || */ TOK_EQ, /* == */ TOK_NE, /* != */ TOK_GE, /* >= */ TOK_LE, /* <= */ TOK_ADD_EQ, /* += */ TOK_SUB_EQ, /* -= */ TOK_MUL_EQ, /* *= */ TOK_DIV_EQ, /* /= */ TOK_MOD_EQ, /* %= */ TOK_AND_EQ, /* &= */ TOK_OR_EQ, /* |= */ TOK_XOR_EQ, /* ^= */ TOK_ARROW, /* -> */ TOK_DOT_STAR, /* .* */ TOK_ARROW_STAR, /* ->* */ TOK_RSHIFT_EQ, /* >>= */ TOK_LSHIFT_EQ, /* <<= */ TOK_ELLIPSIS, /* ... */ } preproc_token_t; /** * A struct for going through a string one token at a time. * If ws is set to WS_PREPROC, then tokenization stops when a * newline or null is encountered. If ws is set to WS_DEFAULT, * then tokenization only stops when a null is encountered. If * ws is set to WS_COMMENT, then tokenization stops only when * a null is encountered, and comments are returned as tokens * instead of being skipped as whitespace. */ typedef struct _StringTokenizer { int tok; /* the current token */ unsigned int hash; /* the hash of the current token, if it is an id */ const char* text; /* the text for the current token, not null-teminated */ size_t len; /* the length of the current token */ parse_space_t ws; /* controls what to consider as whitespace */ } StringTokenizer; /** * Initialize the tokenizer and get the first token. */ VTKWRAPPINGTOOLS_EXPORT void vtkParse_InitTokenizer(StringTokenizer* tokens, const char* text, parse_space_t wstype); /** * Return the next preprocessor token, or '0' if none left. */ VTKWRAPPINGTOOLS_EXPORT int vtkParse_NextToken(StringTokenizer* tokens); /** * Skip over whitespace. * Return the number of chars until the first non-whitespace token. * Set spacetype to WS_DEFAULT, WS_PREPROC, or WS_COMMENT. */ VTKWRAPPINGTOOLS_EXPORT size_t vtkParse_SkipWhitespace(const char* cp, parse_space_t spacetype); /** * Skip over a comment, C style or C++ style. * Return the number of chars until the end of the comment. */ VTKWRAPPINGTOOLS_EXPORT size_t vtkParse_SkipComment(const char* cp); /** * Skip over a string in double or single quotes. * Return the number of chars until the end of the quotes. */ VTKWRAPPINGTOOLS_EXPORT size_t vtkParse_SkipQuotes(const char* cp); /** * Skip over a number. Uses preprocessor semantics. * Return the number of chars until the end of the number. */ VTKWRAPPINGTOOLS_EXPORT size_t vtkParse_SkipNumber(const char* cp); /** * Skip over an identifier. * Return the number of chars until the end of the identifier. */ VTKWRAPPINGTOOLS_EXPORT size_t vtkParse_SkipId(const char* cp); /** * Compute the hash for a id, for use in hash table lookups. * This stops at the first non-Id character, so it is safe to use * on a string that is not null-terminated as long as there is either * whitespace or an operator character before the end of the string. * It can be used on null-terminated strings as well, of course. */ VTKWRAPPINGTOOLS_EXPORT unsigned int vtkParse_HashId(const char* cp); /** * Decode a single unicode character from utf8, or set error flag to 1. * The character pointer will be advanced by one if an error occurred, * and the return value will be the value of the first octet. */ VTKWRAPPINGTOOLS_EXPORT unsigned int vtkParse_DecodeUtf8(const char** cpp, int* error_flag); /** * StringCache provides a simple way of allocating strings centrally. * It eliminates the need to allocate and free each individual string, * which makes the code simpler and more efficient. */ typedef struct _StringCache { unsigned long NumberOfChunks; char** Chunks; size_t ChunkSize; size_t Position; } StringCache; /** * Initialize the string cache. */ VTKWRAPPINGTOOLS_EXPORT void vtkParse_InitStringCache(StringCache* cache); /** * Allocate a new string from the cache. * A total of n+1 bytes will be allocated, to leave room for null. */ VTKWRAPPINGTOOLS_EXPORT char* vtkParse_NewString(StringCache* cache, size_t n); /** * Cache a string so that it can then be used in the vtkParse data * structures. The string will last until the application exits. * At most 'n' chars will be copied, and the string will be terminated. * If a null pointer is provided, then a null pointer will be returned. */ VTKWRAPPINGTOOLS_EXPORT const char* vtkParse_CacheString(StringCache* cache, const char* cp, size_t n); /** * Free all strings that were created with vtkParse_NewString() or * with vtkParse_CacheString(). */ VTKWRAPPINGTOOLS_EXPORT void vtkParse_FreeStringCache(StringCache* cache); #ifdef __cplusplus } /* extern "C" */ #endif #endif /* VTK-HeaderTest-Exclude: vtkParseString.h */