diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4a61d1194b1103423e78d993329e1b2014a542ba..38fa21210cd32daacaf3a8fb9daf079f554f2923 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,6 +35,7 @@ add_executable(SRB2SDL2 MACOSX_BUNDLE WIN32 m_misc.c m_perfstats.c m_random.c + m_tokenizer.c m_queue.c info.c p_ceilng.c diff --git a/src/Sourcefile b/src/Sourcefile index d9de708ce6baf10e10182c5e24c5d8f9da77b30f..5e9ef84154d0d13b6d87c108b3ea5920b6e6b1dd 100644 --- a/src/Sourcefile +++ b/src/Sourcefile @@ -29,6 +29,7 @@ m_menu.c m_misc.c m_perfstats.c m_random.c +m_tokenizer.c m_queue.c info.c p_ceilng.c diff --git a/src/m_misc.c b/src/m_misc.c index ce332910dc8ccf78a8c751adfc96c28c3721aea4..5815d17c54619f46afe6fc29c1277a3621ecc77b 100644 --- a/src/m_misc.c +++ b/src/m_misc.c @@ -31,6 +31,7 @@ #include "doomdef.h" #include "g_game.h" #include "m_misc.h" +#include "m_tokenizer.h" #include "hu_stuff.h" #include "st_stuff.h" #include "v_video.h" @@ -1975,168 +1976,39 @@ void M_UnGetToken(void) endPos = oldendPos; } -#define NUMTOKENS 2 -static const char *tokenizerInput = NULL; -static UINT32 tokenCapacity[NUMTOKENS] = {0}; -static char *tokenizerToken[NUMTOKENS] = {NULL}; -static UINT32 tokenizerStartPos = 0; -static UINT32 tokenizerEndPos = 0; -static UINT32 tokenizerInputLength = 0; -static UINT8 tokenizerInComment = 0; // 0 = not in comment, 1 = // Single-line, 2 = /* Multi-line */ +static tokenizer_t *globalTokenizer = NULL; void M_TokenizerOpen(const char *inputString) { - size_t i; - - tokenizerInput = inputString; - for (i = 0; i < NUMTOKENS; i++) - { - tokenCapacity[i] = 1024; - tokenizerToken[i] = (char*)Z_Malloc(tokenCapacity[i] * sizeof(char), PU_STATIC, NULL); - } - tokenizerInputLength = strlen(tokenizerInput); + globalTokenizer = Tokenizer_Open(inputString, 2); } void M_TokenizerClose(void) { - size_t i; - - tokenizerInput = NULL; - for (i = 0; i < NUMTOKENS; i++) - Z_Free(tokenizerToken[i]); - tokenizerStartPos = 0; - tokenizerEndPos = 0; - tokenizerInComment = 0; -} - -static void M_DetectComment(UINT32 *pos) -{ - if (tokenizerInComment) - return; - - if (*pos >= tokenizerInputLength - 1) - return; - - if (tokenizerInput[*pos] != '/') - return; - - //Single-line comment start - if (tokenizerInput[*pos + 1] == '/') - tokenizerInComment = 1; - //Multi-line comment start - else if (tokenizerInput[*pos + 1] == '*') - tokenizerInComment = 2; -} - -static void M_ReadTokenString(UINT32 i) -{ - UINT32 tokenLength = tokenizerEndPos - tokenizerStartPos; - if (tokenLength + 1 > tokenCapacity[i]) - { - tokenCapacity[i] = tokenLength + 1; - // Assign the memory. Don't forget an extra byte for the end of the string! - tokenizerToken[i] = (char *)Z_Malloc(tokenCapacity[i] * sizeof(char), PU_STATIC, NULL); - } - // Copy the string. - M_Memcpy(tokenizerToken[i], tokenizerInput + tokenizerStartPos, (size_t)tokenLength); - // Make the final character NUL. - tokenizerToken[i][tokenLength] = '\0'; + Tokenizer_Close(globalTokenizer); + globalTokenizer = NULL; } const char *M_TokenizerRead(UINT32 i) { - if (!tokenizerInput) + if (!globalTokenizer) return NULL; - tokenizerStartPos = tokenizerEndPos; - - // Try to detect comments now, in case we're pointing right at one - M_DetectComment(&tokenizerStartPos); - - // Find the first non-whitespace char, or else the end of the string trying - while ((tokenizerInput[tokenizerStartPos] == ' ' - || tokenizerInput[tokenizerStartPos] == '\t' - || tokenizerInput[tokenizerStartPos] == '\r' - || tokenizerInput[tokenizerStartPos] == '\n' - || tokenizerInput[tokenizerStartPos] == '\0' - || tokenizerInput[tokenizerStartPos] == '=' || tokenizerInput[tokenizerStartPos] == ';' // UDMF TEXTMAP. - || tokenizerInComment != 0) - && tokenizerStartPos < tokenizerInputLength) - { - // Try to detect comment endings now - if (tokenizerInComment == 1 && tokenizerInput[tokenizerStartPos] == '\n') - tokenizerInComment = 0; // End of line for a single-line comment - else if (tokenizerInComment == 2 - && tokenizerStartPos < tokenizerInputLength - 1 - && tokenizerInput[tokenizerStartPos] == '*' - && tokenizerInput[tokenizerStartPos+1] == '/') - { - // End of multi-line comment - tokenizerInComment = 0; - tokenizerStartPos++; // Make damn well sure we're out of the comment ending at the end of it all - } - - tokenizerStartPos++; - M_DetectComment(&tokenizerStartPos); - } - - // If the end of the string is reached, no token is to be read - if (tokenizerStartPos == tokenizerInputLength) { - tokenizerEndPos = tokenizerInputLength; - return NULL; - } - // Else, if it's one of these three symbols, capture only this one character - else if (tokenizerInput[tokenizerStartPos] == ',' - || tokenizerInput[tokenizerStartPos] == '{' - || tokenizerInput[tokenizerStartPos] == '}') - { - tokenizerEndPos = tokenizerStartPos + 1; - tokenizerToken[i][0] = tokenizerInput[tokenizerStartPos]; - tokenizerToken[i][1] = '\0'; - return tokenizerToken[i]; - } - // Return entire string within quotes, except without the quotes. - else if (tokenizerInput[tokenizerStartPos] == '"') - { - tokenizerEndPos = ++tokenizerStartPos; - while (tokenizerInput[tokenizerEndPos] != '"' && tokenizerEndPos < tokenizerInputLength) - tokenizerEndPos++; - - M_ReadTokenString(i); - tokenizerEndPos++; - return tokenizerToken[i]; - } - - // Now find the end of the token. This includes several additional characters that are okay to capture as one character, but not trailing at the end of another token. - tokenizerEndPos = tokenizerStartPos + 1; - while ((tokenizerInput[tokenizerEndPos] != ' ' - && tokenizerInput[tokenizerEndPos] != '\t' - && tokenizerInput[tokenizerEndPos] != '\r' - && tokenizerInput[tokenizerEndPos] != '\n' - && tokenizerInput[tokenizerEndPos] != ',' - && tokenizerInput[tokenizerEndPos] != '{' - && tokenizerInput[tokenizerEndPos] != '}' - && tokenizerInput[tokenizerEndPos] != '=' && tokenizerInput[tokenizerEndPos] != ';' // UDMF TEXTMAP. - && tokenizerInComment == 0) - && tokenizerEndPos < tokenizerInputLength) - { - tokenizerEndPos++; - // Try to detect comment starts now; if it's in a comment, we don't want it in this token - M_DetectComment(&tokenizerEndPos); - } - - M_ReadTokenString(i); - return tokenizerToken[i]; + return Tokenizer_SRB2Read(globalTokenizer, i); } UINT32 M_TokenizerGetEndPos(void) { - return tokenizerEndPos; + if (!globalTokenizer) + return 0; + + return Tokenizer_GetEndPos(globalTokenizer); } void M_TokenizerSetEndPos(UINT32 newPos) { - tokenizerEndPos = newPos; + if (globalTokenizer) + Tokenizer_SetEndPos(globalTokenizer, newPos); } /** Count bits in a number. diff --git a/src/m_tokenizer.c b/src/m_tokenizer.c new file mode 100644 index 0000000000000000000000000000000000000000..26275881d3f81fe8db6c22d2e35226839c75af56 --- /dev/null +++ b/src/m_tokenizer.c @@ -0,0 +1,278 @@ +// SONIC ROBO BLAST 2 +//----------------------------------------------------------------------------- +// Copyright (C) 2013-2023 by Sonic Team Junior. +// +// This program is free software distributed under the +// terms of the GNU General Public License, version 2. +// See the 'LICENSE' file for more details. +//----------------------------------------------------------------------------- +/// \file m_tokenizer.c +/// \brief Tokenizer + +#include "m_tokenizer.h" +#include "z_zone.h" + +tokenizer_t *Tokenizer_Open(const char *inputString, unsigned numTokens) +{ + tokenizer_t *tokenizer = Z_Malloc(sizeof(tokenizer_t), PU_STATIC, NULL); + + tokenizer->input = inputString; + tokenizer->startPos = 0; + tokenizer->endPos = 0; + tokenizer->inputLength = 0; + tokenizer->inComment = 0; + tokenizer->get = Tokenizer_Read; + + if (numTokens < 1) + numTokens = 1; + + tokenizer->numTokens = numTokens; + tokenizer->capacity = Z_Malloc(sizeof(UINT32) * numTokens, PU_STATIC, NULL); + tokenizer->token = Z_Malloc(sizeof(char*) * numTokens, PU_STATIC, NULL); + + for (size_t i = 0; i < numTokens; i++) + { + tokenizer->capacity[i] = 1024; + tokenizer->token[i] = (char*)Z_Malloc(tokenizer->capacity[i] * sizeof(char), PU_STATIC, NULL); + } + + tokenizer->inputLength = strlen(tokenizer->input); + + return tokenizer; +} + +void Tokenizer_Close(tokenizer_t *tokenizer) +{ + if (!tokenizer) + return; + + for (size_t i = 0; i < tokenizer->numTokens; i++) + Z_Free(tokenizer->token[i]); + Z_Free(tokenizer->capacity); + Z_Free(tokenizer->token); + Z_Free(tokenizer); +} + +static void Tokenizer_DetectComment(tokenizer_t *tokenizer, UINT32 *pos) +{ + if (tokenizer->inComment) + return; + + if (*pos >= tokenizer->inputLength - 1) + return; + + if (tokenizer->input[*pos] != '/') + return; + + // Single-line comment start + if (tokenizer->input[*pos + 1] == '/') + tokenizer->inComment = 1; + // Multi-line comment start + else if (tokenizer->input[*pos + 1] == '*') + tokenizer->inComment = 2; +} + +static void Tokenizer_ReadTokenString(tokenizer_t *tokenizer, UINT32 i) +{ + UINT32 tokenLength = tokenizer->endPos - tokenizer->startPos; + if (tokenLength + 1 > tokenizer->capacity[i]) + { + tokenizer->capacity[i] = tokenLength + 1; + // Assign the memory. Don't forget an extra byte for the end of the string! + tokenizer->token[i] = (char *)Z_Malloc(tokenizer->capacity[i] * sizeof(char), PU_STATIC, NULL); + } + // Copy the string. + M_Memcpy(tokenizer->token[i], tokenizer->input + tokenizer->startPos, (size_t)tokenLength); + // Make the final character NUL. + tokenizer->token[i][tokenLength] = '\0'; +} + +const char *Tokenizer_Read(tokenizer_t *tokenizer, UINT32 i) +{ + if (!tokenizer->input) + return NULL; + + tokenizer->startPos = tokenizer->endPos; + + // Try to detect comments now, in case we're pointing right at one + Tokenizer_DetectComment(tokenizer, &tokenizer->startPos); + + // Find the first non-whitespace char, or else the end of the string trying + while ((tokenizer->input[tokenizer->startPos] == ' ' + || tokenizer->input[tokenizer->startPos] == '\t' + || tokenizer->input[tokenizer->startPos] == '\r' + || tokenizer->input[tokenizer->startPos] == '\n' + || tokenizer->input[tokenizer->startPos] == '\0' + || tokenizer->inComment != 0) + && tokenizer->startPos < tokenizer->inputLength) + { + // Try to detect comment endings now + if (tokenizer->inComment == 1 && tokenizer->input[tokenizer->startPos] == '\n') + tokenizer->inComment = 0; // End of line for a single-line comment + else if (tokenizer->inComment == 2 + && tokenizer->startPos < tokenizer->inputLength - 1 + && tokenizer->input[tokenizer->startPos] == '*' + && tokenizer->input[tokenizer->startPos+1] == '/') + { + // End of multi-line comment + tokenizer->inComment = 0; + tokenizer->startPos++; // Make damn well sure we're out of the comment ending at the end of it all + } + + tokenizer->startPos++; + Tokenizer_DetectComment(tokenizer, &tokenizer->startPos); + } + + // If the end of the string is reached, no token is to be read + if (tokenizer->startPos == tokenizer->inputLength) { + tokenizer->endPos = tokenizer->inputLength; + return NULL; + } + // Else, if it's one of these three symbols, capture only this one character + else if (tokenizer->input[tokenizer->startPos] == ',' + || tokenizer->input[tokenizer->startPos] == '{' + || tokenizer->input[tokenizer->startPos] == '}' + || tokenizer->input[tokenizer->startPos] == '[' + || tokenizer->input[tokenizer->startPos] == ']' + || tokenizer->input[tokenizer->startPos] == '=' + || tokenizer->input[tokenizer->startPos] == ':' + || tokenizer->input[tokenizer->startPos] == '%') + { + tokenizer->endPos = tokenizer->startPos + 1; + tokenizer->token[i][0] = tokenizer->input[tokenizer->startPos]; + tokenizer->token[i][1] = '\0'; + return tokenizer->token[i]; + } + // Return entire string within quotes, except without the quotes. + else if (tokenizer->input[tokenizer->startPos] == '"') + { + tokenizer->endPos = ++tokenizer->startPos; + while (tokenizer->input[tokenizer->endPos] != '"' && tokenizer->endPos < tokenizer->inputLength) + tokenizer->endPos++; + + Tokenizer_ReadTokenString(tokenizer, i); + tokenizer->endPos++; + return tokenizer->token[i]; + } + + // Now find the end of the token. This includes several additional characters that are okay to capture as one character, but not trailing at the end of another token. + tokenizer->endPos = tokenizer->startPos + 1; + while ((tokenizer->input[tokenizer->endPos] != ' ' + && tokenizer->input[tokenizer->endPos] != '\t' + && tokenizer->input[tokenizer->endPos] != '\r' + && tokenizer->input[tokenizer->endPos] != '\n' + && tokenizer->input[tokenizer->endPos] != ',' + && tokenizer->input[tokenizer->endPos] != '{' + && tokenizer->input[tokenizer->endPos] != '}' + && tokenizer->input[tokenizer->endPos] != '[' + && tokenizer->input[tokenizer->endPos] != ']' + && tokenizer->input[tokenizer->endPos] != '=' + && tokenizer->input[tokenizer->endPos] != ':' + && tokenizer->input[tokenizer->endPos] != '%' + && tokenizer->inComment == 0) + && tokenizer->endPos < tokenizer->inputLength) + { + tokenizer->endPos++; + // Try to detect comment starts now; if it's in a comment, we don't want it in this token + Tokenizer_DetectComment(tokenizer, &tokenizer->endPos); + } + + Tokenizer_ReadTokenString(tokenizer, i); + return tokenizer->token[i]; +} + +const char *Tokenizer_SRB2Read(tokenizer_t *tokenizer, UINT32 i) +{ + if (!tokenizer->input) + return NULL; + + tokenizer->startPos = tokenizer->endPos; + + // Try to detect comments now, in case we're pointing right at one + Tokenizer_DetectComment(tokenizer, &tokenizer->startPos); + + // Find the first non-whitespace char, or else the end of the string trying + while ((tokenizer->input[tokenizer->startPos] == ' ' + || tokenizer->input[tokenizer->startPos] == '\t' + || tokenizer->input[tokenizer->startPos] == '\r' + || tokenizer->input[tokenizer->startPos] == '\n' + || tokenizer->input[tokenizer->startPos] == '\0' + || tokenizer->input[tokenizer->startPos] == '=' || tokenizer->input[tokenizer->startPos] == ';' // UDMF TEXTMAP. + || tokenizer->inComment != 0) + && tokenizer->startPos < tokenizer->inputLength) + { + // Try to detect comment endings now + if (tokenizer->inComment == 1 && tokenizer->input[tokenizer->startPos] == '\n') + tokenizer->inComment = 0; // End of line for a single-line comment + else if (tokenizer->inComment == 2 + && tokenizer->startPos < tokenizer->inputLength - 1 + && tokenizer->input[tokenizer->startPos] == '*' + && tokenizer->input[tokenizer->startPos+1] == '/') + { + // End of multi-line comment + tokenizer->inComment = 0; + tokenizer->startPos++; // Make damn well sure we're out of the comment ending at the end of it all + } + + tokenizer->startPos++; + Tokenizer_DetectComment(tokenizer, &tokenizer->startPos); + } + + // If the end of the string is reached, no token is to be read + if (tokenizer->startPos == tokenizer->inputLength) { + tokenizer->endPos = tokenizer->inputLength; + return NULL; + } + // Else, if it's one of these three symbols, capture only this one character + else if (tokenizer->input[tokenizer->startPos] == ',' + || tokenizer->input[tokenizer->startPos] == '{' + || tokenizer->input[tokenizer->startPos] == '}') + { + tokenizer->endPos = tokenizer->startPos + 1; + tokenizer->token[i][0] = tokenizer->input[tokenizer->startPos]; + tokenizer->token[i][1] = '\0'; + return tokenizer->token[i]; + } + // Return entire string within quotes, except without the quotes. + else if (tokenizer->input[tokenizer->startPos] == '"') + { + tokenizer->endPos = ++tokenizer->startPos; + while (tokenizer->input[tokenizer->endPos] != '"' && tokenizer->endPos < tokenizer->inputLength) + tokenizer->endPos++; + + Tokenizer_ReadTokenString(tokenizer, i); + tokenizer->endPos++; + return tokenizer->token[i]; + } + + // Now find the end of the token. This includes several additional characters that are okay to capture as one character, but not trailing at the end of another token. + tokenizer->endPos = tokenizer->startPos + 1; + while ((tokenizer->input[tokenizer->endPos] != ' ' + && tokenizer->input[tokenizer->endPos] != '\t' + && tokenizer->input[tokenizer->endPos] != '\r' + && tokenizer->input[tokenizer->endPos] != '\n' + && tokenizer->input[tokenizer->endPos] != ',' + && tokenizer->input[tokenizer->endPos] != '{' + && tokenizer->input[tokenizer->endPos] != '}' + && tokenizer->input[tokenizer->endPos] != '=' && tokenizer->input[tokenizer->endPos] != ';' // UDMF TEXTMAP. + && tokenizer->inComment == 0) + && tokenizer->endPos < tokenizer->inputLength) + { + tokenizer->endPos++; + // Try to detect comment starts now; if it's in a comment, we don't want it in this token + Tokenizer_DetectComment(tokenizer, &tokenizer->endPos); + } + + Tokenizer_ReadTokenString(tokenizer, i); + return tokenizer->token[i]; +} + +UINT32 Tokenizer_GetEndPos(tokenizer_t *tokenizer) +{ + return tokenizer->endPos; +} + +void Tokenizer_SetEndPos(tokenizer_t *tokenizer, UINT32 newPos) +{ + tokenizer->endPos = newPos; +} diff --git a/src/m_tokenizer.h b/src/m_tokenizer.h new file mode 100644 index 0000000000000000000000000000000000000000..88cb2a566907d27b0d70508fb20c9ab1dff84fbb --- /dev/null +++ b/src/m_tokenizer.h @@ -0,0 +1,38 @@ +// SONIC ROBO BLAST 2 +//----------------------------------------------------------------------------- +// Copyright (C) 2013-2023 by Sonic Team Junior. +// +// This program is free software distributed under the +// terms of the GNU General Public License, version 2. +// See the 'LICENSE' file for more details. +//----------------------------------------------------------------------------- +/// \file m_tokenizer.h +/// \brief Tokenizer + +#ifndef __M_TOKENIZER__ +#define __M_TOKENIZER__ + +#include "doomdef.h" + +typedef struct Tokenizer +{ + const char *input; + unsigned numTokens; + UINT32 *capacity; + char **token; + UINT32 startPos; + UINT32 endPos; + UINT32 inputLength; + UINT8 inComment; // 0 = not in comment, 1 = // Single-line, 2 = /* Multi-line */ + const char *(*get)(struct Tokenizer*, UINT32); +} tokenizer_t; + +tokenizer_t *Tokenizer_Open(const char *inputString, unsigned numTokens); +void Tokenizer_Close(tokenizer_t *tokenizer); + +const char *Tokenizer_Read(tokenizer_t *tokenizer, UINT32 i); +const char *Tokenizer_SRB2Read(tokenizer_t *tokenizer, UINT32 i); +UINT32 Tokenizer_GetEndPos(tokenizer_t *tokenizer); +void Tokenizer_SetEndPos(tokenizer_t *tokenizer, UINT32 newPos); + +#endif diff --git a/src/sdl/Srb2SDL-vc10.vcxproj b/src/sdl/Srb2SDL-vc10.vcxproj index 9b51cfb8094a1d6897e0685e9531ccd21df0fbdb..3ecd668d95552f423fe61e05126075ed3f6c855d 100644 --- a/src/sdl/Srb2SDL-vc10.vcxproj +++ b/src/sdl/Srb2SDL-vc10.vcxproj @@ -292,6 +292,7 @@ <ClInclude Include="..\m_fixed.h" /> <ClInclude Include="..\m_menu.h" /> <ClInclude Include="..\m_misc.h" /> + <ClInclude Include="..\m_tokenizer.h" /> <ClInclude Include="..\m_perfstats.h" /> <ClInclude Include="..\m_queue.h" /> <ClInclude Include="..\m_random.h" /> @@ -468,6 +469,7 @@ <ClCompile Include="..\m_fixed.c" /> <ClCompile Include="..\m_menu.c" /> <ClCompile Include="..\m_misc.c" /> + <ClCompile Include="..\m_tokenizer.c" /> <ClCompile Include="..\m_perfstats.c" /> <ClCompile Include="..\m_queue.c" /> <ClCompile Include="..\m_random.c" /> diff --git a/src/sdl/Srb2SDL-vc10.vcxproj.filters b/src/sdl/Srb2SDL-vc10.vcxproj.filters index 96501b2160e587937f5513fa864f3a2f93a6d6ac..54bb4463ccb3f224cb6a3bf8cfe9c275694bb9a5 100644 --- a/src/sdl/Srb2SDL-vc10.vcxproj.filters +++ b/src/sdl/Srb2SDL-vc10.vcxproj.filters @@ -339,6 +339,9 @@ <ClInclude Include="..\m_misc.h"> <Filter>M_Misc</Filter> </ClInclude> + <ClInclude Include="..\m_tokenizer.h"> + <Filter>M_Misc</Filter> + </ClInclude> <ClInclude Include="..\m_perfstats.h"> <Filter>M_Misc</Filter> </ClInclude> @@ -831,6 +834,9 @@ <ClCompile Include="..\m_misc.c"> <Filter>M_Misc</Filter> </ClCompile> + <ClCompile Include="..\m_tokenizer.c"> + <Filter>M_Misc</Filter> + </ClCompile> <ClCompile Include="..\m_perfstats.c"> <Filter>M_Misc</Filter> </ClCompile>