From 1269fe6c0e1d9fa22062b8aa079b5c877385e65c Mon Sep 17 00:00:00 2001 From: MascaraSnake <jonassauer27@gmail.com> Date: Sat, 8 Jan 2022 13:17:15 +0100 Subject: [PATCH] Implement faster textmap parser --- src/doomdef.h | 7 ++- src/m_misc.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++--- src/p_setup.c | 71 ++++++++++----------- 3 files changed, 194 insertions(+), 50 deletions(-) diff --git a/src/doomdef.h b/src/doomdef.h index 41ec9ef33f..1b86cc9548 100644 --- a/src/doomdef.h +++ b/src/doomdef.h @@ -483,8 +483,11 @@ extern void *(*M_Memcpy)(void* dest, const void* src, size_t n) FUNCNONNULL; char *va(const char *format, ...) FUNCPRINTF; char *M_GetToken(const char *inputString); void M_UnGetToken(void); -UINT32 M_GetTokenPos(void); -void M_SetTokenPos(UINT32 newPos); +void M_TokenizerOpen(const char *inputString); +void M_TokenizerClose(void); +const char *M_TokenizerRead(UINT32 i); +UINT32 M_TokenizerGetEndPos(void); +void M_TokenizerSetEndPos(UINT32 newPos); char *sizeu1(size_t num); char *sizeu2(size_t num); char *sizeu3(size_t num); diff --git a/src/m_misc.c b/src/m_misc.c index 59783d5d30..d49307c7fe 100644 --- a/src/m_misc.c +++ b/src/m_misc.c @@ -1970,18 +1970,168 @@ void M_UnGetToken(void) endPos = oldendPos; } -/** Returns the current token's position. - */ -UINT32 M_GetTokenPos(void) +#define NUMTOKENS 2 +static const char *tokenizerInput = NULL; +static UINT32 tokenCapacity[NUMTOKENS] = {0}; +static char *tokenizerToken[NUMTOKENS] = {NULL}; +static UINT32 tokenizerStartPos = 0; +static UINT32 tokenizerEndPos = 0; +static UINT32 tokenizerInputLength = 0; +static UINT8 tokenizerInComment = 0; // 0 = not in comment, 1 = // Single-line, 2 = /* Multi-line */ + +void M_TokenizerOpen(const char *inputString) { - return endPos; + size_t i; + + tokenizerInput = inputString; + for (i = 0; i < NUMTOKENS; i++) + { + tokenCapacity[i] = 1024; + tokenizerToken[i] = (char*)Z_Malloc(tokenCapacity[i] * sizeof(char), PU_STATIC, NULL); + } + tokenizerInputLength = strlen(tokenizerInput); } -/** Sets the current token's position. - */ -void M_SetTokenPos(UINT32 newPos) +void M_TokenizerClose(void) +{ + size_t i; + + tokenizerInput = NULL; + for (i = 0; i < NUMTOKENS; i++) + Z_Free(tokenizerToken[i]); + tokenizerStartPos = 0; + tokenizerEndPos = 0; + tokenizerInComment = 0; +} + +static void M_DetectComment(UINT32 *pos) +{ + if (tokenizerInComment) + return; + + if (*pos >= tokenizerInputLength - 1) + return; + + if (tokenizerInput[*pos] != '/') + return; + + //Single-line comment start + if (tokenizerInput[*pos + 1] == '/') + tokenizerInComment = 1; + //Multi-line comment start + else if (tokenizerInput[*pos + 1] == '*') + tokenizerInComment = 2; +} + +static void M_ReadTokenString(UINT32 i) +{ + UINT32 tokenLength = tokenizerEndPos - tokenizerStartPos; + if (tokenLength + 1 > tokenCapacity[i]) + { + tokenCapacity[i] = tokenLength + 1; + // Assign the memory. Don't forget an extra byte for the end of the string! + tokenizerToken[i] = (char *)Z_Malloc(tokenCapacity[i] * sizeof(char), PU_STATIC, NULL); + } + // Copy the string. + M_Memcpy(tokenizerToken[i], tokenizerInput + tokenizerStartPos, (size_t)tokenLength); + // Make the final character NUL. + tokenizerToken[i][tokenLength] = '\0'; +} + +const char *M_TokenizerRead(UINT32 i) +{ + if (!tokenizerInput) + return NULL; + + tokenizerStartPos = tokenizerEndPos; + + // Try to detect comments now, in case we're pointing right at one + M_DetectComment(&tokenizerStartPos); + + // Find the first non-whitespace char, or else the end of the string trying + while ((tokenizerInput[tokenizerStartPos] == ' ' + || tokenizerInput[tokenizerStartPos] == '\t' + || tokenizerInput[tokenizerStartPos] == '\r' + || tokenizerInput[tokenizerStartPos] == '\n' + || tokenizerInput[tokenizerStartPos] == '\0' + || tokenizerInput[tokenizerStartPos] == '=' || tokenizerInput[tokenizerStartPos] == ';' // UDMF TEXTMAP. + || tokenizerInComment != 0) + && tokenizerStartPos < tokenizerInputLength) + { + // Try to detect comment endings now + if (tokenizerInComment == 1 && tokenizerInput[tokenizerStartPos] == '\n') + tokenizerInComment = 0; // End of line for a single-line comment + else if (tokenizerInComment == 2 + && tokenizerStartPos < tokenizerInputLength - 1 + && tokenizerInput[tokenizerStartPos] == '*' + && tokenizerInput[tokenizerStartPos+1] == '/') + { + // End of multi-line comment + tokenizerInComment = 0; + tokenizerStartPos++; // Make damn well sure we're out of the comment ending at the end of it all + } + + tokenizerStartPos++; + M_DetectComment(&tokenizerStartPos); + } + + // If the end of the string is reached, no token is to be read + if (tokenizerStartPos == tokenizerInputLength) { + tokenizerEndPos = tokenizerInputLength; + return NULL; + } + // Else, if it's one of these three symbols, capture only this one character + else if (tokenizerInput[tokenizerStartPos] == ',' + || tokenizerInput[tokenizerStartPos] == '{' + || tokenizerInput[tokenizerStartPos] == '}') + { + tokenizerEndPos = tokenizerStartPos + 1; + tokenizerToken[i][0] = tokenizerInput[tokenizerStartPos]; + tokenizerToken[i][1] = '\0'; + return tokenizerToken[i]; + } + // Return entire string within quotes, except without the quotes. + else if (tokenizerInput[tokenizerStartPos] == '"') + { + tokenizerEndPos = ++tokenizerStartPos; + while (tokenizerInput[tokenizerEndPos] != '"' && tokenizerEndPos < tokenizerInputLength) + tokenizerEndPos++; + + M_ReadTokenString(i); + tokenizerEndPos++; + return tokenizerToken[i]; + } + + // Now find the end of the token. This includes several additional characters that are okay to capture as one character, but not trailing at the end of another token. + tokenizerEndPos = tokenizerStartPos + 1; + while ((tokenizerInput[tokenizerEndPos] != ' ' + && tokenizerInput[tokenizerEndPos] != '\t' + && tokenizerInput[tokenizerEndPos] != '\r' + && tokenizerInput[tokenizerEndPos] != '\n' + && tokenizerInput[tokenizerEndPos] != ',' + && tokenizerInput[tokenizerEndPos] != '{' + && tokenizerInput[tokenizerEndPos] != '}' + && tokenizerInput[tokenizerEndPos] != '=' && tokenizerInput[tokenizerEndPos] != ';' // UDMF TEXTMAP. + && tokenizerInComment == 0) + && tokenizerEndPos < tokenizerInputLength) + { + tokenizerEndPos++; + // Try to detect comment starts now; if it's in a comment, we don't want it in this token + M_DetectComment(&tokenizerEndPos); + } + + M_ReadTokenString(i); + return tokenizerToken[i]; +} + +UINT32 M_TokenizerGetEndPos(void) +{ + return tokenizerEndPos; +} + +void M_TokenizerSetEndPos(UINT32 newPos) { - endPos = newPos; + tokenizerEndPos = newPos; } /** Count bits in a number. diff --git a/src/p_setup.c b/src/p_setup.c index c6561a8a88..74f761f24a 100644 --- a/src/p_setup.c +++ b/src/p_setup.c @@ -1428,9 +1428,9 @@ UINT32 vertexesPos[UINT16_MAX]; UINT32 sectorsPos[UINT16_MAX]; // Determine total amount of map data in TEXTMAP. -static boolean TextmapCount(UINT8 *data, size_t size) +static boolean TextmapCount(size_t size) { - char *tkn = M_GetToken((char *)data); + const char *tkn = M_TokenizerRead(0); UINT8 brackets = 0; nummapthings = 0; @@ -1442,20 +1442,16 @@ static boolean TextmapCount(UINT8 *data, size_t size) // Look for namespace at the beginning. if (!fastcmp(tkn, "namespace")) { - Z_Free(tkn); CONS_Alert(CONS_ERROR, "No namespace at beginning of lump!\n"); return false; } - Z_Free(tkn); // Check if namespace is valid. - tkn = M_GetToken(NULL); + tkn = M_TokenizerRead(0); if (!fastcmp(tkn, "srb2")) CONS_Alert(CONS_WARNING, "Invalid namespace '%s', only 'srb2' is supported.\n", tkn); - Z_Free(tkn); - tkn = M_GetToken(NULL); - while (tkn && M_GetTokenPos() < size) + while ((tkn = M_TokenizerRead(0)) && M_TokenizerGetEndPos() < size) { // Avoid anything inside bracketed stuff, only look for external keywords. if (brackets) @@ -1467,24 +1463,19 @@ static boolean TextmapCount(UINT8 *data, size_t size) brackets++; // Check for valid fields. else if (fastcmp(tkn, "thing")) - mapthingsPos[nummapthings++] = M_GetTokenPos(); + mapthingsPos[nummapthings++] = M_TokenizerGetEndPos(); else if (fastcmp(tkn, "linedef")) - linesPos[numlines++] = M_GetTokenPos(); + linesPos[numlines++] = M_TokenizerGetEndPos(); else if (fastcmp(tkn, "sidedef")) - sidesPos[numsides++] = M_GetTokenPos(); + sidesPos[numsides++] = M_TokenizerGetEndPos(); else if (fastcmp(tkn, "vertex")) - vertexesPos[numvertexes++] = M_GetTokenPos(); + vertexesPos[numvertexes++] = M_TokenizerGetEndPos(); else if (fastcmp(tkn, "sector")) - sectorsPos[numsectors++] = M_GetTokenPos(); + sectorsPos[numsectors++] = M_TokenizerGetEndPos(); else CONS_Alert(CONS_NOTICE, "Unknown field '%s'.\n", tkn); - - Z_Free(tkn); - tkn = M_GetToken(NULL); } - Z_Free(tkn); - if (brackets) { CONS_Alert(CONS_ERROR, "Unclosed brackets detected in textmap lump.\n"); @@ -1494,7 +1485,7 @@ static boolean TextmapCount(UINT8 *data, size_t size) return true; } -static void ParseTextmapVertexParameter(UINT32 i, char *param, char *val) +static void ParseTextmapVertexParameter(UINT32 i, const char *param, const char *val) { if (fastcmp(param, "x")) vertexes[i].x = FLOAT_TO_FIXED(atof(val)); @@ -1541,7 +1532,7 @@ typedef struct textmap_plane_s { textmap_plane_t textmap_planefloor = {0, 0, 0, 0, 0}; textmap_plane_t textmap_planeceiling = {0, 0, 0, 0, 0}; -static void ParseTextmapSectorParameter(UINT32 i, char *param, char *val) +static void ParseTextmapSectorParameter(UINT32 i, const char *param, const char *val) { if (fastcmp(param, "heightfloor")) sectors[i].floorheight = atol(val) << FRACBITS; @@ -1565,7 +1556,7 @@ static void ParseTextmapSectorParameter(UINT32 i, char *param, char *val) Tag_FSet(§ors[i].tags, atol(val)); else if (fastcmp(param, "moreids")) { - char* id = val; + const char* id = val; while (id) { Tag_Add(§ors[i].tags, atol(id)); @@ -1754,7 +1745,7 @@ static void ParseTextmapSectorParameter(UINT32 i, char *param, char *val) sectors[i].triggerer = atol(val); } -static void ParseTextmapSidedefParameter(UINT32 i, char *param, char *val) +static void ParseTextmapSidedefParameter(UINT32 i, const char *param, const char *val) { if (fastcmp(param, "offsetx")) sides[i].textureoffset = atol(val)<<FRACBITS; @@ -1772,13 +1763,13 @@ static void ParseTextmapSidedefParameter(UINT32 i, char *param, char *val) sides[i].repeatcnt = atol(val); } -static void ParseTextmapLinedefParameter(UINT32 i, char *param, char *val) +static void ParseTextmapLinedefParameter(UINT32 i, const char *param, const char *val) { if (fastcmp(param, "id")) Tag_FSet(&lines[i].tags, atol(val)); else if (fastcmp(param, "moreids")) { - char* id = val; + const char* id = val; while (id) { Tag_Add(&lines[i].tags, atol(id)); @@ -1866,13 +1857,13 @@ static void ParseTextmapLinedefParameter(UINT32 i, char *param, char *val) lines[i].flags |= ML_TFERLINE; } -static void ParseTextmapThingParameter(UINT32 i, char *param, char *val) +static void ParseTextmapThingParameter(UINT32 i, const char *param, const char *val) { if (fastcmp(param, "id")) Tag_FSet(&mapthings[i].tags, atol(val)); else if (fastcmp(param, "moreids")) { - char* id = val; + const char* id = val; while (id) { Tag_Add(&mapthings[i].tags, atol(id)); @@ -1923,32 +1914,25 @@ static void ParseTextmapThingParameter(UINT32 i, char *param, char *val) * \param Structure number (mapthings, sectors, ...). * \param Parser function pointer. */ -static void TextmapParse(UINT32 dataPos, size_t num, void (*parser)(UINT32, char *, char *)) +static void TextmapParse(UINT32 dataPos, size_t num, void (*parser)(UINT32, const char *, const char *)) { - char *param, *val; + const char *param, *val; - M_SetTokenPos(dataPos); - param = M_GetToken(NULL); + M_TokenizerSetEndPos(dataPos); + param = M_TokenizerRead(0); if (!fastcmp(param, "{")) { - Z_Free(param); CONS_Alert(CONS_WARNING, "Invalid UDMF data capsule!\n"); return; } - Z_Free(param); while (true) { - param = M_GetToken(NULL); + param = M_TokenizerRead(0); if (fastcmp(param, "}")) - { - Z_Free(param); break; - } - val = M_GetToken(NULL); + val = M_TokenizerRead(1); parser(num, param, val); - Z_Free(param); - Z_Free(val); } } @@ -2649,8 +2633,12 @@ static boolean P_LoadMapData(const virtres_t *virt) if (udmf) // Count how many entries for each type we got in textmap. { virtlump_t *textmap = vres_Find(virt, "TEXTMAP"); - if (!TextmapCount(textmap->data, textmap->size)) + M_TokenizerOpen((char *)textmap->data); + if (!TextmapCount(textmap->size)) + { + M_TokenizerClose(); return false; + } } else { @@ -2704,7 +2692,10 @@ static boolean P_LoadMapData(const virtres_t *virt) // Load map data. if (udmf) + { P_LoadTextmap(); + M_TokenizerClose(); + } else { P_LoadVertices(virtvertexes->data); -- GitLab