buffer original source data into the Token struct for use in-game

This commit is contained in:
2025-11-28 18:01:57 -07:00
parent 804bf11d02
commit f172ac5899
3 changed files with 100 additions and 27 deletions

View File

@@ -18,18 +18,18 @@ quick_error! {
display("IO Error: {}", err) display("IO Error: {}", err)
source(err) source(err)
} }
NumberParseError(err: std::num::ParseIntError, line: usize, column: usize) { NumberParseError(err: std::num::ParseIntError, line: usize, column: usize, original: String) {
display("Number Parse Error: {}\nLine: {}, Column: {}", err, line, column) display("Number Parse Error: {}\nLine: {}, Column: {}", err, line, column)
source(err) source(err)
} }
DecimalParseError(err: rust_decimal::Error, line: usize, column: usize) { DecimalParseError(err: rust_decimal::Error, line: usize, column: usize, original: String) {
display("Decimal Parse Error: {}\nLine: {}, Column: {}", err, line, column) display("Decimal Parse Error: {}\nLine: {}, Column: {}", err, line, column)
source(err) source(err)
} }
UnknownSymbolError(char: char, line: usize, column: usize) { UnknownSymbolError(char: char, line: usize, column: usize, original: String) {
display("Unknown Symbol: {}\nLine: {}, Column: {}", char, line, column) display("Unknown Symbol: {}\nLine: {}, Column: {}", char, line, column)
} }
UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize) { UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize, original: String) {
display("Unknown Keyword or Identifier: {}\nLine: {}, Column: {}", val, line, column) display("Unknown Keyword or Identifier: {}\nLine: {}, Column: {}", val, line, column)
} }
} }
@@ -45,6 +45,7 @@ pub struct Tokenizer<'a> {
line: usize, line: usize,
column: usize, column: usize,
returned_eof: bool, returned_eof: bool,
string_buffer: String,
} }
impl<'a> Tokenizer<'a> { impl<'a> Tokenizer<'a> {
@@ -58,6 +59,7 @@ impl<'a> Tokenizer<'a> {
column: 1, column: 1,
char_buffer: [0], char_buffer: [0],
returned_eof: false, returned_eof: false,
string_buffer: String::new(),
}) })
} }
} }
@@ -72,6 +74,7 @@ impl<'a> From<String> for Tokenizer<'a> {
column: 1, column: 1,
char_buffer: [0], char_buffer: [0],
returned_eof: false, returned_eof: false,
string_buffer: String::new(),
} }
} }
} }
@@ -84,6 +87,7 @@ impl<'a> From<&'a str> for Tokenizer<'a> {
column: 1, column: 1,
line: 1, line: 1,
returned_eof: false, returned_eof: false,
string_buffer: String::new(),
} }
} }
} }
@@ -111,6 +115,7 @@ impl<'a> Tokenizer<'a> {
self.column += 1; self.column += 1;
} }
self.string_buffer.push(c);
Ok(Some(c)) Ok(Some(c))
} }
@@ -177,7 +182,12 @@ impl<'a> Tokenizer<'a> {
return self.tokenize_keyword_or_identifier(next_char).map(Some); return self.tokenize_keyword_or_identifier(next_char).map(Some);
} }
_ => { _ => {
return Err(Error::UnknownSymbolError(next_char, self.line, self.column)); return Err(Error::UnknownSymbolError(
next_char,
self.line,
self.column,
std::mem::take(&mut self.string_buffer),
));
} }
} }
} }
@@ -185,7 +195,12 @@ impl<'a> Tokenizer<'a> {
Ok(None) Ok(None)
} else { } else {
self.returned_eof = true; self.returned_eof = true;
Ok(Some(Token::new(TokenType::EOF, self.line, self.column))) Ok(Some(Token::new(
TokenType::EOF,
self.line,
self.column,
Some(std::mem::take(&mut self.string_buffer)),
)))
} }
} }
@@ -212,6 +227,7 @@ impl<'a> Tokenizer<'a> {
TokenType::Symbol(Symbol::$symbol), TokenType::Symbol(Symbol::$symbol),
self.line, self.line,
self.column, self.column,
Some(std::mem::take(&mut self.string_buffer)),
)) ))
}; };
} }
@@ -279,6 +295,7 @@ impl<'a> Tokenizer<'a> {
first_symbol, first_symbol,
self.line, self.line,
self.column, self.column,
std::mem::take(&mut self.string_buffer),
)), )),
} }
} }
@@ -328,17 +345,28 @@ impl<'a> Tokenizer<'a> {
let decimal_scale = decimal.len() as u32; let decimal_scale = decimal.len() as u32;
let number = format!("{}{}", primary, decimal) let number = format!("{}{}", primary, decimal)
.parse::<i128>() .parse::<i128>()
.map_err(|e| Error::NumberParseError(e, self.line, self.column))?; .map_err(|e| {
Error::NumberParseError(
e,
self.line,
self.column,
std::mem::take(&mut self.string_buffer),
)
})?;
Number::Decimal( Number::Decimal(
Decimal::try_from_i128_with_scale(number, decimal_scale) Decimal::try_from_i128_with_scale(number, decimal_scale).map_err(|e| {
.map_err(|e| Error::DecimalParseError(e, line, column))?, Error::DecimalParseError(
e,
line,
column,
std::mem::take(&mut self.string_buffer),
)
})?,
) )
} else { } else {
Number::Integer( Number::Integer(primary.parse().map_err(|e| {
primary Error::NumberParseError(e, line, column, std::mem::take(&mut self.string_buffer))
.parse() })?)
.map_err(|e| Error::NumberParseError(e, line, column))?,
)
}; };
// check if the next char is a temperature suffix // check if the next char is a temperature suffix
@@ -347,14 +375,31 @@ impl<'a> Tokenizer<'a> {
'c' => Temperature::Celsius(number), 'c' => Temperature::Celsius(number),
'f' => Temperature::Fahrenheit(number), 'f' => Temperature::Fahrenheit(number),
'k' => Temperature::Kelvin(number), 'k' => Temperature::Kelvin(number),
_ => return Ok(Token::new(TokenType::Number(number), line, column)), _ => {
return Ok(Token::new(
TokenType::Number(number),
line,
column,
Some(std::mem::take(&mut self.string_buffer)),
));
}
} }
.to_kelvin(); .to_kelvin();
self.next_char()?; self.next_char()?;
Ok(Token::new(TokenType::Number(temperature), line, column)) Ok(Token::new(
TokenType::Number(temperature),
line,
column,
Some(std::mem::take(&mut self.string_buffer)),
))
} else { } else {
Ok(Token::new(TokenType::Number(number), line, column)) Ok(Token::new(
TokenType::Number(number),
line,
column,
Some(std::mem::take(&mut self.string_buffer)),
))
} }
} }
@@ -373,7 +418,12 @@ impl<'a> Tokenizer<'a> {
buffer.push(next_char); buffer.push(next_char);
} }
Ok(Token::new(TokenType::String(buffer), line, column)) Ok(Token::new(
TokenType::String(buffer),
line,
column,
Some(std::mem::take(&mut self.string_buffer)),
))
} }
/// Tokenizes a keyword or an identifier. Also handles boolean literals /// Tokenizes a keyword or an identifier. Also handles boolean literals
@@ -384,6 +434,7 @@ impl<'a> Tokenizer<'a> {
TokenType::Keyword(Keyword::$keyword), TokenType::Keyword(Keyword::$keyword),
self.line, self.line,
self.column, self.column,
Some(std::mem::take(&mut self.string_buffer)),
)); ));
}}; }};
} }
@@ -426,13 +477,19 @@ impl<'a> Tokenizer<'a> {
// boolean literals // boolean literals
"true" if next_ws!() => { "true" if next_ws!() => {
return Ok(Token::new(TokenType::Boolean(true), self.line, self.column)); return Ok(Token::new(
TokenType::Boolean(true),
self.line,
self.column,
Some(std::mem::take(&mut self.string_buffer)),
));
} }
"false" if next_ws!() => { "false" if next_ws!() => {
return Ok(Token::new( return Ok(Token::new(
TokenType::Boolean(false), TokenType::Boolean(false),
self.line, self.line,
self.column, self.column,
Some(std::mem::take(&mut self.string_buffer)),
)); ));
} }
// if the next character is whitespace or not alphanumeric, then we have an identifier // if the next character is whitespace or not alphanumeric, then we have an identifier
@@ -442,6 +499,7 @@ impl<'a> Tokenizer<'a> {
TokenType::Identifier(val.to_string()), TokenType::Identifier(val.to_string()),
line, line,
column, column,
Some(std::mem::take(&mut self.string_buffer)),
)); ));
} }
_ => {} _ => {}
@@ -449,7 +507,12 @@ impl<'a> Tokenizer<'a> {
looped_char = self.next_char()?; looped_char = self.next_char()?;
} }
Err(Error::UnknownKeywordOrIdentifierError(buffer, line, column)) Err(Error::UnknownKeywordOrIdentifierError(
buffer,
line,
column,
std::mem::take(&mut self.string_buffer),
))
} }
} }

View File

@@ -8,14 +8,21 @@ pub struct Token {
pub line: usize, pub line: usize,
/// The column where the token was found /// The column where the token was found
pub column: usize, pub column: usize,
pub original_string: Option<String>,
} }
impl Token { impl Token {
pub fn new(token_type: TokenType, line: usize, column: usize) -> Self { pub fn new(
token_type: TokenType,
line: usize,
column: usize,
original: Option<String>,
) -> Self {
Self { Self {
token_type, token_type,
line, line,
column, column,
original_string: original,
} }
} }
} }

View File

@@ -49,13 +49,13 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<Ff
for token in tokenizer { for token in tokenizer {
match token { match token {
Err(TokenizerError::NumberParseError(_, _, col)) Err(TokenizerError::NumberParseError(_, _, col, ref original))
| Err(TokenizerError::UnknownSymbolError(_, _, col)) | Err(TokenizerError::UnknownSymbolError(_, _, col, ref original))
| Err(TokenizerError::DecimalParseError(_, _, col)) | Err(TokenizerError::DecimalParseError(_, _, col, ref original))
| Err(TokenizerError::UnknownKeywordOrIdentifierError(_, _, col)) => { | Err(TokenizerError::UnknownKeywordOrIdentifierError(_, _, col, ref original)) => {
tokens.push(FfiToken { tokens.push(FfiToken {
column: col as i32, column: col as i32,
text: "".into(), text: original.to_string().into(),
tooltip: "".into(), tooltip: "".into(),
// Safety: it's okay to unwrap the err here because we are matching on the `Err` variant // Safety: it's okay to unwrap the err here because we are matching on the `Err` variant
error: token.unwrap_err().to_string().into(), error: token.unwrap_err().to_string().into(),
@@ -64,7 +64,10 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<Ff
} }
Err(_) => return safer_ffi::Vec::EMPTY, Err(_) => return safer_ffi::Vec::EMPTY,
Ok(token) if !matches!(token.token_type, TokenType::EOF) => tokens.push(FfiToken { Ok(token) if !matches!(token.token_type, TokenType::EOF) => tokens.push(FfiToken {
text: token.token_type.to_string().into(), text: token
.original_string
.unwrap_or(token.token_type.to_string())
.into(),
tooltip: "".into(), tooltip: "".into(),
error: "".into(), error: "".into(),
status: "".into(), status: "".into(),