You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

262 lines
8.6 KiB
C

/*=========================================================================
Program: Visualization Toolkit
Module: vtkParseString.h
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
All rights reserved.
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notice for more information.
=========================================================================*/
/*-------------------------------------------------------------------------
Copyright (c) 2012 David Gobbi.
Contributed to the VisualizationToolkit by the author in April 2012
under the terms of the Visualization Toolkit 2008 copyright.
-------------------------------------------------------------------------*/
/**
This file provides string handling routines.
The two important jobs done by these routines are string tokenization
and string caching.
Tokenization is done as per the rules of a C++ preprocessor, and
breaks the strings into ids, literals, and operators. Any string
is a valid input for the tokenizer, and it is up to the parser to
decide if the resulting tokens are valid within the grammar. The
two primary tokenization functions are vtkParse_InitTokenizer()
and vtkParse_NextToken().
Caching refers to how string memory management is done. The
parser uses "const char *" for all strings, and expects all strings
to be persistent and constant. These conditions are automatically
met by static strings, but dynamically-generated strings must be
cached until the parse is complete. The primary caching functions
are vtkParse_CacheString() and vtkParse_FreeStringCache().
*/
#ifndef vtkParseString_h
#define vtkParseString_h
#include "vtkWrappingToolsModule.h"
#include <stddef.h>
#ifdef __cplusplus
extern "C"
{
#endif
/**
* Various important char types for tokenization
*/
typedef enum _parse_char_type
{
CPRE_NONDIGIT = 0x01, /* A-Z a-z and _ */
CPRE_DIGIT = 0x02, /* 0-9 */
CPRE_XDIGIT = 0x03, /* 0-9 A-Z a-z and _ */
CPRE_EXTEND = 0x04, /* non-ascii character */
CPRE_ID = 0x05, /* starting char for identifier */
CPRE_XID = 0x07, /* continuing char for identifier */
CPRE_HEX = 0x08, /* 0-9 A-F a-f hexadecimal digits */
CPRE_SIGN = 0x10, /* +- (sign for floats) */
CPRE_QUOTE = 0x20, /* " and ' */
CPRE_HSPACE = 0x40, /* space, tab, carriage return */
CPRE_VSPACE = 0x80, /* newline, vertical tab, form feed */
CPRE_WHITE = 0xC0, /* all whitespace characters */
} parse_char_type;
/**
* check if a char is of a certain type
*/
VTKWRAPPINGTOOLS_EXPORT int vtkParse_CharType(char c, int bits);
/**
* Whitespace types that can be used with the tokenizer.
* - WS_DEFAULT treats newlines and formfeeds as regular whitespace.
* - WS_PREPROC treats newline as end-of-line, not as whitespace.
* - WS_COMMENT treats comments as tokens, not as whitespace.
*/
typedef enum _parse_space_t
{
WS_DEFAULT = CPRE_WHITE, /* skip all whitespace */
WS_PREPROC = CPRE_HSPACE, /* skip horizontal whitespace only */
WS_COMMENT = (CPRE_WHITE | 0x100), /* comments as tokens */
} parse_space_t;
/**
* Preprocessor tokens for C++.
*/
typedef enum _preproc_token_t
{
TOK_OTHER = 257,
TOK_ID, /* any id */
TOK_CHAR, /* char literal */
TOK_STRING, /* string literal */
TOK_NUMBER, /* any numeric literal */
TOK_COMMENT, /* C or C++ comment */
TOK_DBLHASH, /* ## */
TOK_SCOPE, /* :: */
TOK_INCR, /* ++ */
TOK_DECR, /* -- */
TOK_RSHIFT, /* >> */
TOK_LSHIFT, /* << */
TOK_AND, /* && */
TOK_OR, /* || */
TOK_EQ, /* == */
TOK_NE, /* != */
TOK_GE, /* >= */
TOK_LE, /* <= */
TOK_ADD_EQ, /* += */
TOK_SUB_EQ, /* -= */
TOK_MUL_EQ, /* *= */
TOK_DIV_EQ, /* /= */
TOK_MOD_EQ, /* %= */
TOK_AND_EQ, /* &= */
TOK_OR_EQ, /* |= */
TOK_XOR_EQ, /* ^= */
TOK_ARROW, /* -> */
TOK_DOT_STAR, /* .* */
TOK_ARROW_STAR, /* ->* */
TOK_RSHIFT_EQ, /* >>= */
TOK_LSHIFT_EQ, /* <<= */
TOK_ELLIPSIS, /* ... */
} preproc_token_t;
/**
* A struct for going through a string one token at a time.
* If ws is set to WS_PREPROC, then tokenization stops when a
* newline or null is encountered. If ws is set to WS_DEFAULT,
* then tokenization only stops when a null is encountered. If
* ws is set to WS_COMMENT, then tokenization stops only when
* a null is encountered, and comments are returned as tokens
* instead of being skipped as whitespace.
*/
typedef struct _StringTokenizer
{
int tok; /* the current token */
unsigned int hash; /* the hash of the current token, if it is an id */
const char* text; /* the text for the current token, not null-teminated */
size_t len; /* the length of the current token */
parse_space_t ws; /* controls what to consider as whitespace */
} StringTokenizer;
/**
* Initialize the tokenizer and get the first token.
*/
VTKWRAPPINGTOOLS_EXPORT
void vtkParse_InitTokenizer(StringTokenizer* tokens, const char* text, parse_space_t wstype);
/**
* Return the next preprocessor token, or '0' if none left.
*/
VTKWRAPPINGTOOLS_EXPORT
int vtkParse_NextToken(StringTokenizer* tokens);
/**
* Skip over whitespace.
* Return the number of chars until the first non-whitespace token.
* Set spacetype to WS_DEFAULT, WS_PREPROC, or WS_COMMENT.
*/
VTKWRAPPINGTOOLS_EXPORT
size_t vtkParse_SkipWhitespace(const char* cp, parse_space_t spacetype);
/**
* Skip over a comment, C style or C++ style.
* Return the number of chars until the end of the comment.
*/
VTKWRAPPINGTOOLS_EXPORT
size_t vtkParse_SkipComment(const char* cp);
/**
* Skip over a string in double or single quotes.
* Return the number of chars until the end of the quotes.
*/
VTKWRAPPINGTOOLS_EXPORT
size_t vtkParse_SkipQuotes(const char* cp);
/**
* Skip over a number. Uses preprocessor semantics.
* Return the number of chars until the end of the number.
*/
VTKWRAPPINGTOOLS_EXPORT
size_t vtkParse_SkipNumber(const char* cp);
/**
* Skip over an identifier.
* Return the number of chars until the end of the identifier.
*/
VTKWRAPPINGTOOLS_EXPORT
size_t vtkParse_SkipId(const char* cp);
/**
* Compute the hash for a id, for use in hash table lookups.
* This stops at the first non-Id character, so it is safe to use
* on a string that is not null-terminated as long as there is either
* whitespace or an operator character before the end of the string.
* It can be used on null-terminated strings as well, of course.
*/
VTKWRAPPINGTOOLS_EXPORT
unsigned int vtkParse_HashId(const char* cp);
/**
* Decode a single unicode character from utf8, or set error flag to 1.
* The character pointer will be advanced by one if an error occurred,
* and the return value will be the value of the first octet.
*/
VTKWRAPPINGTOOLS_EXPORT
unsigned int vtkParse_DecodeUtf8(const char** cpp, int* error_flag);
/**
* StringCache provides a simple way of allocating strings centrally.
* It eliminates the need to allocate and free each individual string,
* which makes the code simpler and more efficient.
*/
typedef struct _StringCache
{
unsigned long NumberOfChunks;
char** Chunks;
size_t ChunkSize;
size_t Position;
} StringCache;
/**
* Initialize the string cache.
*/
VTKWRAPPINGTOOLS_EXPORT
void vtkParse_InitStringCache(StringCache* cache);
/**
* Allocate a new string from the cache.
* A total of n+1 bytes will be allocated, to leave room for null.
*/
VTKWRAPPINGTOOLS_EXPORT
char* vtkParse_NewString(StringCache* cache, size_t n);
/**
* Cache a string so that it can then be used in the vtkParse data
* structures. The string will last until the application exits.
* At most 'n' chars will be copied, and the string will be terminated.
* If a null pointer is provided, then a null pointer will be returned.
*/
VTKWRAPPINGTOOLS_EXPORT
const char* vtkParse_CacheString(StringCache* cache, const char* cp, size_t n);
/**
* Free all strings that were created with vtkParse_NewString() or
* with vtkParse_CacheString().
*/
VTKWRAPPINGTOOLS_EXPORT
void vtkParse_FreeStringCache(StringCache* cache);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif
/* VTK-HeaderTest-Exclude: vtkParseString.h */