You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
262 lines
8.6 KiB
C
262 lines
8.6 KiB
C
/*=========================================================================
|
|
|
|
Program: Visualization Toolkit
|
|
Module: vtkParseString.h
|
|
|
|
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
|
|
All rights reserved.
|
|
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
|
|
|
|
This software is distributed WITHOUT ANY WARRANTY; without even
|
|
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
PURPOSE. See the above copyright notice for more information.
|
|
|
|
=========================================================================*/
|
|
/*-------------------------------------------------------------------------
|
|
Copyright (c) 2012 David Gobbi.
|
|
|
|
Contributed to the VisualizationToolkit by the author in April 2012
|
|
under the terms of the Visualization Toolkit 2008 copyright.
|
|
-------------------------------------------------------------------------*/
|
|
|
|
/**
|
|
This file provides string handling routines.
|
|
|
|
The two important jobs done by these routines are string tokenization
|
|
and string caching.
|
|
|
|
Tokenization is done as per the rules of a C++ preprocessor, and
|
|
breaks the strings into ids, literals, and operators. Any string
|
|
is a valid input for the tokenizer, and it is up to the parser to
|
|
decide if the resulting tokens are valid within the grammar. The
|
|
two primary tokenization functions are vtkParse_InitTokenizer()
|
|
and vtkParse_NextToken().
|
|
|
|
Caching refers to how string memory management is done. The
|
|
parser uses "const char *" for all strings, and expects all strings
|
|
to be persistent and constant. These conditions are automatically
|
|
met by static strings, but dynamically-generated strings must be
|
|
cached until the parse is complete. The primary caching functions
|
|
are vtkParse_CacheString() and vtkParse_FreeStringCache().
|
|
*/
|
|
|
|
#ifndef vtkParseString_h
|
|
#define vtkParseString_h
|
|
|
|
#include "vtkWrappingToolsModule.h"
|
|
#include <stddef.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C"
|
|
{
|
|
#endif
|
|
|
|
/**
|
|
* Various important char types for tokenization
|
|
*/
|
|
typedef enum _parse_char_type
|
|
{
|
|
CPRE_NONDIGIT = 0x01, /* A-Z a-z and _ */
|
|
CPRE_DIGIT = 0x02, /* 0-9 */
|
|
CPRE_XDIGIT = 0x03, /* 0-9 A-Z a-z and _ */
|
|
CPRE_EXTEND = 0x04, /* non-ascii character */
|
|
CPRE_ID = 0x05, /* starting char for identifier */
|
|
CPRE_XID = 0x07, /* continuing char for identifier */
|
|
CPRE_HEX = 0x08, /* 0-9 A-F a-f hexadecimal digits */
|
|
CPRE_SIGN = 0x10, /* +- (sign for floats) */
|
|
CPRE_QUOTE = 0x20, /* " and ' */
|
|
CPRE_HSPACE = 0x40, /* space, tab, carriage return */
|
|
CPRE_VSPACE = 0x80, /* newline, vertical tab, form feed */
|
|
CPRE_WHITE = 0xC0, /* all whitespace characters */
|
|
} parse_char_type;
|
|
|
|
/**
|
|
* check if a char is of a certain type
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT int vtkParse_CharType(char c, int bits);
|
|
|
|
/**
|
|
* Whitespace types that can be used with the tokenizer.
|
|
* - WS_DEFAULT treats newlines and formfeeds as regular whitespace.
|
|
* - WS_PREPROC treats newline as end-of-line, not as whitespace.
|
|
* - WS_COMMENT treats comments as tokens, not as whitespace.
|
|
*/
|
|
typedef enum _parse_space_t
|
|
{
|
|
WS_DEFAULT = CPRE_WHITE, /* skip all whitespace */
|
|
WS_PREPROC = CPRE_HSPACE, /* skip horizontal whitespace only */
|
|
WS_COMMENT = (CPRE_WHITE | 0x100), /* comments as tokens */
|
|
} parse_space_t;
|
|
|
|
/**
|
|
* Preprocessor tokens for C++.
|
|
*/
|
|
typedef enum _preproc_token_t
|
|
{
|
|
TOK_OTHER = 257,
|
|
TOK_ID, /* any id */
|
|
TOK_CHAR, /* char literal */
|
|
TOK_STRING, /* string literal */
|
|
TOK_NUMBER, /* any numeric literal */
|
|
TOK_COMMENT, /* C or C++ comment */
|
|
TOK_DBLHASH, /* ## */
|
|
TOK_SCOPE, /* :: */
|
|
TOK_INCR, /* ++ */
|
|
TOK_DECR, /* -- */
|
|
TOK_RSHIFT, /* >> */
|
|
TOK_LSHIFT, /* << */
|
|
TOK_AND, /* && */
|
|
TOK_OR, /* || */
|
|
TOK_EQ, /* == */
|
|
TOK_NE, /* != */
|
|
TOK_GE, /* >= */
|
|
TOK_LE, /* <= */
|
|
TOK_ADD_EQ, /* += */
|
|
TOK_SUB_EQ, /* -= */
|
|
TOK_MUL_EQ, /* *= */
|
|
TOK_DIV_EQ, /* /= */
|
|
TOK_MOD_EQ, /* %= */
|
|
TOK_AND_EQ, /* &= */
|
|
TOK_OR_EQ, /* |= */
|
|
TOK_XOR_EQ, /* ^= */
|
|
TOK_ARROW, /* -> */
|
|
TOK_DOT_STAR, /* .* */
|
|
TOK_ARROW_STAR, /* ->* */
|
|
TOK_RSHIFT_EQ, /* >>= */
|
|
TOK_LSHIFT_EQ, /* <<= */
|
|
TOK_ELLIPSIS, /* ... */
|
|
} preproc_token_t;
|
|
|
|
/**
|
|
* A struct for going through a string one token at a time.
|
|
* If ws is set to WS_PREPROC, then tokenization stops when a
|
|
* newline or null is encountered. If ws is set to WS_DEFAULT,
|
|
* then tokenization only stops when a null is encountered. If
|
|
* ws is set to WS_COMMENT, then tokenization stops only when
|
|
* a null is encountered, and comments are returned as tokens
|
|
* instead of being skipped as whitespace.
|
|
*/
|
|
typedef struct _StringTokenizer
|
|
{
|
|
int tok; /* the current token */
|
|
unsigned int hash; /* the hash of the current token, if it is an id */
|
|
const char* text; /* the text for the current token, not null-teminated */
|
|
size_t len; /* the length of the current token */
|
|
parse_space_t ws; /* controls what to consider as whitespace */
|
|
} StringTokenizer;
|
|
|
|
/**
|
|
* Initialize the tokenizer and get the first token.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
void vtkParse_InitTokenizer(StringTokenizer* tokens, const char* text, parse_space_t wstype);
|
|
|
|
/**
|
|
* Return the next preprocessor token, or '0' if none left.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
int vtkParse_NextToken(StringTokenizer* tokens);
|
|
|
|
/**
|
|
* Skip over whitespace.
|
|
* Return the number of chars until the first non-whitespace token.
|
|
* Set spacetype to WS_DEFAULT, WS_PREPROC, or WS_COMMENT.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
size_t vtkParse_SkipWhitespace(const char* cp, parse_space_t spacetype);
|
|
|
|
/**
|
|
* Skip over a comment, C style or C++ style.
|
|
* Return the number of chars until the end of the comment.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
size_t vtkParse_SkipComment(const char* cp);
|
|
|
|
/**
|
|
* Skip over a string in double or single quotes.
|
|
* Return the number of chars until the end of the quotes.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
size_t vtkParse_SkipQuotes(const char* cp);
|
|
|
|
/**
|
|
* Skip over a number. Uses preprocessor semantics.
|
|
* Return the number of chars until the end of the number.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
size_t vtkParse_SkipNumber(const char* cp);
|
|
|
|
/**
|
|
* Skip over an identifier.
|
|
* Return the number of chars until the end of the identifier.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
size_t vtkParse_SkipId(const char* cp);
|
|
|
|
/**
|
|
* Compute the hash for a id, for use in hash table lookups.
|
|
* This stops at the first non-Id character, so it is safe to use
|
|
* on a string that is not null-terminated as long as there is either
|
|
* whitespace or an operator character before the end of the string.
|
|
* It can be used on null-terminated strings as well, of course.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
unsigned int vtkParse_HashId(const char* cp);
|
|
|
|
/**
|
|
* Decode a single unicode character from utf8, or set error flag to 1.
|
|
* The character pointer will be advanced by one if an error occurred,
|
|
* and the return value will be the value of the first octet.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
unsigned int vtkParse_DecodeUtf8(const char** cpp, int* error_flag);
|
|
|
|
/**
|
|
* StringCache provides a simple way of allocating strings centrally.
|
|
* It eliminates the need to allocate and free each individual string,
|
|
* which makes the code simpler and more efficient.
|
|
*/
|
|
typedef struct _StringCache
|
|
{
|
|
unsigned long NumberOfChunks;
|
|
char** Chunks;
|
|
size_t ChunkSize;
|
|
size_t Position;
|
|
} StringCache;
|
|
|
|
/**
|
|
* Initialize the string cache.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
void vtkParse_InitStringCache(StringCache* cache);
|
|
|
|
/**
|
|
* Allocate a new string from the cache.
|
|
* A total of n+1 bytes will be allocated, to leave room for null.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
char* vtkParse_NewString(StringCache* cache, size_t n);
|
|
|
|
/**
|
|
* Cache a string so that it can then be used in the vtkParse data
|
|
* structures. The string will last until the application exits.
|
|
* At most 'n' chars will be copied, and the string will be terminated.
|
|
* If a null pointer is provided, then a null pointer will be returned.
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
const char* vtkParse_CacheString(StringCache* cache, const char* cp, size_t n);
|
|
|
|
/**
|
|
* Free all strings that were created with vtkParse_NewString() or
|
|
* with vtkParse_CacheString().
|
|
*/
|
|
VTKWRAPPINGTOOLS_EXPORT
|
|
void vtkParse_FreeStringCache(StringCache* cache);
|
|
|
|
#ifdef __cplusplus
|
|
} /* extern "C" */
|
|
#endif
|
|
|
|
#endif
|
|
/* VTK-HeaderTest-Exclude: vtkParseString.h */
|