Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
SRB2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
STJr
SRB2
Commits
c5a1b2d7
Commit
c5a1b2d7
authored
4 months ago
by
LJ Sonic
Browse files
Options
Downloads
Plain Diff
Merge branch 'escape-chars-in-tokenizer' into 'next'
Make UDMF parser aware of escape sequences See merge request
!2647
parents
56513ec0
64047541
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!2647
Make UDMF parser aware of escape sequences
Pipeline
#8206
canceled
4 months ago
Stage: build
Stage: osxcross
Changes
2
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/m_tokenizer.c
+162
-14
162 additions, 14 deletions
src/m_tokenizer.c
src/m_tokenizer.h
+2
-1
2 additions, 1 deletion
src/m_tokenizer.h
with
164 additions
and
15 deletions
src/m_tokenizer.c
+
162
−
14
View file @
c5a1b2d7
// SONIC ROBO BLAST 2
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// Copyright (C) 2013-202
4
by Sonic Team Junior.
// Copyright (C) 2013-202
5
by Sonic Team Junior.
//
//
// This program is free software distributed under the
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// terms of the GNU General Public License, version 2.
...
@@ -28,6 +28,7 @@ tokenizer_t *Tokenizer_Open(const char *inputString, size_t len, unsigned numTok
...
@@ -28,6 +28,7 @@ tokenizer_t *Tokenizer_Open(const char *inputString, size_t len, unsigned numTok
tokenizer
->
endPos
=
0
;
tokenizer
->
endPos
=
0
;
tokenizer
->
inputLength
=
0
;
tokenizer
->
inputLength
=
0
;
tokenizer
->
inComment
=
0
;
tokenizer
->
inComment
=
0
;
tokenizer
->
stringNeedsEscaping
=
false
;
tokenizer
->
inString
=
0
;
tokenizer
->
inString
=
0
;
tokenizer
->
get
=
Tokenizer_Read
;
tokenizer
->
get
=
Tokenizer_Read
;
...
@@ -92,6 +93,124 @@ static void DetectComment(tokenizer_t *tokenizer, UINT32 *pos)
...
@@ -92,6 +93,124 @@ static void DetectComment(tokenizer_t *tokenizer, UINT32 *pos)
tokenizer
->
inComment
=
2
;
tokenizer
->
inComment
=
2
;
}
}
// This function detects escape sequences in a string and attempts to convert them.
static
size_t
EscapeString
(
char
*
output
,
const
char
*
input
,
size_t
inputLength
)
{
const
char
*
end
=
input
+
inputLength
;
size_t
i
=
0
;
while
(
input
<
end
)
{
char
chr
=
*
input
++
;
if
(
chr
==
'\\'
)
{
chr
=
*
input
++
;
switch
(
chr
)
{
case
'n'
:
output
[
i
]
=
'\n'
;
i
++
;
break
;
case
't'
:
output
[
i
]
=
'\t'
;
i
++
;
break
;
case
'\\'
:
output
[
i
]
=
'\\'
;
i
++
;
break
;
case
'"'
:
output
[
i
]
=
'\"'
;
i
++
;
break
;
case
'x'
:
{
int
out
=
0
,
c
;
int
j
=
0
;
chr
=
*
input
++
;
for
(
j
=
0
;
j
<
5
&&
isxdigit
(
chr
);
j
++
)
{
c
=
((
chr
<=
'9'
)
?
(
chr
-
'0'
)
:
(
tolower
(
chr
)
-
'a'
+
10
));
out
=
(
out
<<
4
)
|
c
;
chr
=
*
input
++
;
}
input
--
;
switch
(
j
)
{
case
4
:
output
[
i
]
=
(
out
>>
8
)
&
0xFF
;
i
++
;
/* FALLTHRU */
case
2
:
output
[
i
]
=
out
&
0xFF
;
i
++
;
break
;
default:
// TODO: Displaying parsing errors properly will require
// some refactoring of the tokenizer itself. For now,
// this function will silently return an empty string
// if it encounters a malformed escape sequence.
// This situation cannot happen for i.e. UDMF comments,
// so it's okay to do this right now.
// CONS_Alert(CONS_WARNING, "Escape sequence has wrong size\n");
i
=
0
;
goto
done
;
}
break
;
}
default:
if
(
isdigit
(
chr
))
{
int
out
=
0
;
int
j
=
0
;
do
{
out
=
10
*
out
+
(
chr
-
'0'
);
chr
=
*
input
++
;
}
while
(
++
j
<
3
&&
isdigit
(
chr
));
input
--
;
if
(
out
>
255
)
{
// CONS_Alert(CONS_WARNING, "Escape sequence is too large\n");
i
=
0
;
goto
done
;
}
output
[
i
]
=
out
;
i
++
;
}
else
{
// CONS_Alert(CONS_WARNING, "Unknown escape sequence '\\%c'\n", chr);
i
=
0
;
goto
done
;
}
break
;
}
}
else
{
output
[
i
]
=
chr
;
i
++
;
}
}
done:
output
[
i
]
=
'\0'
;
i
++
;
return
i
;
}
static
void
Tokenizer_ReadTokenString
(
tokenizer_t
*
tokenizer
,
UINT32
i
)
static
void
Tokenizer_ReadTokenString
(
tokenizer_t
*
tokenizer
,
UINT32
i
)
{
{
UINT32
tokenLength
=
tokenizer
->
endPos
-
tokenizer
->
startPos
;
UINT32
tokenLength
=
tokenizer
->
endPos
-
tokenizer
->
startPos
;
...
@@ -101,11 +220,47 @@ static void Tokenizer_ReadTokenString(tokenizer_t *tokenizer, UINT32 i)
...
@@ -101,11 +220,47 @@ static void Tokenizer_ReadTokenString(tokenizer_t *tokenizer, UINT32 i)
// Assign the memory. Don't forget an extra byte for the end of the string!
// Assign the memory. Don't forget an extra byte for the end of the string!
tokenizer
->
token
[
i
]
=
(
char
*
)
Z_Malloc
(
tokenizer
->
capacity
[
i
]
*
sizeof
(
char
),
PU_STATIC
,
NULL
);
tokenizer
->
token
[
i
]
=
(
char
*
)
Z_Malloc
(
tokenizer
->
capacity
[
i
]
*
sizeof
(
char
),
PU_STATIC
,
NULL
);
}
}
// Copy the string.
// Copy the string.
if
(
tokenizer
->
stringNeedsEscaping
)
{
EscapeString
(
tokenizer
->
token
[
i
],
tokenizer
->
input
+
tokenizer
->
startPos
,
(
size_t
)
tokenLength
);
}
else
{
M_Memcpy
(
tokenizer
->
token
[
i
],
tokenizer
->
input
+
tokenizer
->
startPos
,
(
size_t
)
tokenLength
);
M_Memcpy
(
tokenizer
->
token
[
i
],
tokenizer
->
input
+
tokenizer
->
startPos
,
(
size_t
)
tokenLength
);
// Make the final character NUL.
// Make the final character NUL.
tokenizer
->
token
[
i
][
tokenLength
]
=
'\0'
;
tokenizer
->
token
[
i
][
tokenLength
]
=
'\0'
;
}
}
}
static
void
ScanString
(
tokenizer_t
*
tokenizer
)
{
tokenizer
->
stringNeedsEscaping
=
false
;
while
(
tokenizer
->
input
[
tokenizer
->
endPos
]
!=
'"'
&&
tokenizer
->
endPos
<
tokenizer
->
inputLength
)
{
if
(
!
DetectLineBreak
(
tokenizer
,
tokenizer
->
endPos
))
{
// Skip one character ahead if this looks like an escape sequence
if
(
tokenizer
->
input
[
tokenizer
->
endPos
]
==
'\\'
)
{
tokenizer
->
stringNeedsEscaping
=
true
;
tokenizer
->
endPos
++
;
// Oh. Naughty. We hit the end of the input.
// Stop scanning, then.
if
(
tokenizer
->
endPos
==
tokenizer
->
inputLength
)
return
;
DetectLineBreak
(
tokenizer
,
tokenizer
->
endPos
);
}
}
tokenizer
->
endPos
++
;
}
}
const
char
*
Tokenizer_Read
(
tokenizer_t
*
tokenizer
,
UINT32
i
)
const
char
*
Tokenizer_Read
(
tokenizer_t
*
tokenizer
,
UINT32
i
)
{
{
...
@@ -117,11 +272,7 @@ const char *Tokenizer_Read(tokenizer_t *tokenizer, UINT32 i)
...
@@ -117,11 +272,7 @@ const char *Tokenizer_Read(tokenizer_t *tokenizer, UINT32 i)
// If in a string, return the entire string within quotes, except without the quotes.
// If in a string, return the entire string within quotes, except without the quotes.
if
(
tokenizer
->
inString
==
1
)
if
(
tokenizer
->
inString
==
1
)
{
{
while
(
tokenizer
->
input
[
tokenizer
->
endPos
]
!=
'"'
&&
tokenizer
->
endPos
<
tokenizer
->
inputLength
)
ScanString
(
tokenizer
);
{
DetectLineBreak
(
tokenizer
,
tokenizer
->
endPos
);
tokenizer
->
endPos
++
;
}
Tokenizer_ReadTokenString
(
tokenizer
,
i
);
Tokenizer_ReadTokenString
(
tokenizer
,
i
);
tokenizer
->
inString
=
2
;
tokenizer
->
inString
=
2
;
...
@@ -134,6 +285,7 @@ const char *Tokenizer_Read(tokenizer_t *tokenizer, UINT32 i)
...
@@ -134,6 +285,7 @@ const char *Tokenizer_Read(tokenizer_t *tokenizer, UINT32 i)
tokenizer
->
token
[
i
][
0
]
=
tokenizer
->
input
[
tokenizer
->
startPos
];
tokenizer
->
token
[
i
][
0
]
=
tokenizer
->
input
[
tokenizer
->
startPos
];
tokenizer
->
token
[
i
][
1
]
=
'\0'
;
tokenizer
->
token
[
i
][
1
]
=
'\0'
;
tokenizer
->
inString
=
0
;
tokenizer
->
inString
=
0
;
tokenizer
->
stringNeedsEscaping
=
false
;
return
tokenizer
->
token
[
i
];
return
tokenizer
->
token
[
i
];
}
}
...
@@ -281,11 +433,7 @@ const char *Tokenizer_SRB2Read(tokenizer_t *tokenizer, UINT32 i)
...
@@ -281,11 +433,7 @@ const char *Tokenizer_SRB2Read(tokenizer_t *tokenizer, UINT32 i)
else
if
(
tokenizer
->
input
[
tokenizer
->
startPos
]
==
'"'
)
else
if
(
tokenizer
->
input
[
tokenizer
->
startPos
]
==
'"'
)
{
{
tokenizer
->
endPos
=
++
tokenizer
->
startPos
;
tokenizer
->
endPos
=
++
tokenizer
->
startPos
;
while
(
tokenizer
->
input
[
tokenizer
->
endPos
]
!=
'"'
&&
tokenizer
->
endPos
<
tokenizer
->
inputLength
)
ScanString
(
tokenizer
);
{
DetectLineBreak
(
tokenizer
,
tokenizer
->
endPos
);
tokenizer
->
endPos
++
;
}
Tokenizer_ReadTokenString
(
tokenizer
,
i
);
Tokenizer_ReadTokenString
(
tokenizer
,
i
);
tokenizer
->
endPos
++
;
tokenizer
->
endPos
++
;
...
...
This diff is collapsed.
Click to expand it.
src/m_tokenizer.h
+
2
−
1
View file @
c5a1b2d7
// SONIC ROBO BLAST 2
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// Copyright (C) 2013-202
4
by Sonic Team Junior.
// Copyright (C) 2013-202
5
by Sonic Team Junior.
//
//
// This program is free software distributed under the
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// terms of the GNU General Public License, version 2.
...
@@ -26,6 +26,7 @@ typedef struct Tokenizer
...
@@ -26,6 +26,7 @@ typedef struct Tokenizer
UINT32
inputLength
;
UINT32
inputLength
;
UINT8
inComment
;
// 0 = not in comment, 1 = // Single-line, 2 = /* Multi-line */
UINT8
inComment
;
// 0 = not in comment, 1 = // Single-line, 2 = /* Multi-line */
UINT8
inString
;
// 0 = not in string, 1 = in string, 2 = just left string
UINT8
inString
;
// 0 = not in string, 1 = in string, 2 = just left string
boolean
stringNeedsEscaping
;
int
line
;
int
line
;
const
char
*
(
*
get
)(
struct
Tokenizer
*
,
UINT32
);
const
char
*
(
*
get
)(
struct
Tokenizer
*
,
UINT32
);
}
tokenizer_t
;
}
tokenizer_t
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment