Added tokenizer tests for new literal types

This commit is contained in:
2026-01-01 03:18:51 -07:00
parent 76add65235
commit bc7c77846f

View File

@@ -135,9 +135,9 @@ pub enum TokenType<'a> {
/// Represents a string token /// Represents a string token
String(Cow<'a, str>), String(Cow<'a, str>),
#[regex(r"0[xX][0-9a-fA-F][0-9a-fA-F_]*([cfk])?", parse_number)] #[regex(r"0[xX][0-9a-fA-F][0-9a-fA-F_]*", parse_number)]
#[regex(r"0[oO][0-7][0-7_]*([cfk])?", parse_number)] #[regex(r"0[oO][0-7][0-7_]*", parse_number)]
#[regex(r"0[bB][01][01_]*([cfk])?", parse_number)] #[regex(r"0[bB][01][01_]*", parse_number)]
#[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)] #[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)]
/// Represents a number token /// Represents a number token
Number(Number), Number(Number),
@@ -233,71 +233,75 @@ pub enum Comment<'a> {
fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result<Number, LexError> { fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result<Number, LexError> {
let slice = lexer.slice(); let slice = lexer.slice();
let last_char = slice.chars().last().unwrap_or_default();
let (num_str, suffix) = match last_char {
'c' | 'k' | 'f' => (&slice[..slice.len() - 1], Some(last_char)),
_ => (slice, None),
};
let clean_str = if num_str.contains('_') {
num_str.replace('_', "")
} else {
num_str.to_string()
};
let line = lexer.extras.line_count; let line = lexer.extras.line_count;
let mut span = lexer.span(); let mut span = lexer.span();
span.end -= lexer.extras.line_start_index; span.end -= lexer.extras.line_start_index;
span.start -= lexer.extras.line_start_index; span.start -= lexer.extras.line_start_index;
let unit = match suffix {
Some('c') => Unit::Celsius,
Some('f') => Unit::Fahrenheit,
Some('k') => Unit::Kelvin,
_ => Unit::None,
};
// Determine the base and parse accordingly // Determine the base and parse accordingly
if clean_str.starts_with("0x") || clean_str.starts_with("0X") { if slice.starts_with("0x") || slice.starts_with("0X") {
// Hexadecimal // Hexadecimal - no temperature suffix allowed
let hex_part = &clean_str[2..]; let clean_str = slice[2..].replace('_', "");
Ok(Number::Integer( Ok(Number::Integer(
i128::from_str_radix(hex_part, 16) i128::from_str_radix(&clean_str, 16)
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?,
unit, Unit::None,
)) ))
} else if clean_str.starts_with("0o") || clean_str.starts_with("0O") { } else if slice.starts_with("0o") || slice.starts_with("0O") {
// Octal // Octal - no temperature suffix allowed
let octal_part = &clean_str[2..]; let clean_str = slice[2..].replace('_', "");
Ok(Number::Integer( Ok(Number::Integer(
i128::from_str_radix(octal_part, 8) i128::from_str_radix(&clean_str, 8)
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?,
unit, Unit::None,
)) ))
} else if clean_str.starts_with("0b") || clean_str.starts_with("0B") { } else if slice.starts_with("0b") || slice.starts_with("0B") {
// Binary // Binary - no temperature suffix allowed
let binary_part = &clean_str[2..]; let clean_str = slice[2..].replace('_', "");
Ok(Number::Integer( Ok(Number::Integer(
i128::from_str_radix(binary_part, 2) i128::from_str_radix(&clean_str, 2)
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?,
unit, Unit::None,
))
} else if clean_str.contains('.') {
// Decimal floating point
Ok(Number::Decimal(
clean_str
.parse::<Decimal>()
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?,
unit,
)) ))
} else { } else {
// Decimal integer // Decimal (with optional temperature suffix)
Ok(Number::Integer( let last_char = slice.chars().last().unwrap_or_default();
clean_str let (num_str, suffix) = match last_char {
.parse::<i128>() 'c' | 'k' | 'f' => (&slice[..slice.len() - 1], Some(last_char)),
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, _ => (slice, None),
unit, };
))
let clean_str = if num_str.contains('_') {
num_str.replace('_', "")
} else {
num_str.to_string()
};
let unit = match suffix {
Some('c') => Unit::Celsius,
Some('f') => Unit::Fahrenheit,
Some('k') => Unit::Kelvin,
_ => Unit::None,
};
if clean_str.contains('.') {
// Decimal floating point
Ok(Number::Decimal(
clean_str
.parse::<Decimal>()
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?,
unit,
))
} else {
// Decimal integer
Ok(Number::Integer(
clean_str
.parse::<i128>()
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?,
unit,
))
}
} }
} }
@@ -916,6 +920,7 @@ documented! {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::TokenType; use super::TokenType;
use super::{Number, Unit};
use logos::Logos; use logos::Logos;
#[test] #[test]
@@ -929,4 +934,151 @@ mod tests {
assert!(!tokens.iter().any(|res| res.is_err())); assert!(!tokens.iter().any(|res| res.is_err()));
Ok(()) Ok(())
} }
#[test]
fn test_binary_literals() -> anyhow::Result<()> {
let src = "0b1010 0b0 0b1111_0000";
let lexer = TokenType::lexer(src);
let tokens: Vec<_> = lexer.collect::<Result<Vec<_>, _>>()?;
assert_eq!(tokens.len(), 3);
assert!(
matches!(
&tokens[0],
TokenType::Number(Number::Integer(10, Unit::None))
),
"Expected binary 0b1010 = 10"
);
assert!(
matches!(
&tokens[1],
TokenType::Number(Number::Integer(0, Unit::None))
),
"Expected binary 0b0 = 0"
);
assert!(
matches!(
&tokens[2],
TokenType::Number(Number::Integer(240, Unit::None))
),
"Expected binary 0b1111_0000 = 240"
);
Ok(())
}
#[test]
fn test_octal_literals() -> anyhow::Result<()> {
let src = "0o77 0o0 0o7_777";
let lexer = TokenType::lexer(src);
let tokens: Vec<_> = lexer.collect::<Result<Vec<_>, _>>()?;
assert_eq!(tokens.len(), 3);
assert!(
matches!(
&tokens[0],
TokenType::Number(Number::Integer(63, Unit::None))
),
"Expected octal 0o77 = 63"
);
assert!(
matches!(
&tokens[1],
TokenType::Number(Number::Integer(0, Unit::None))
),
"Expected octal 0o0 = 0"
);
assert!(
matches!(
&tokens[2],
TokenType::Number(Number::Integer(4095, Unit::None))
),
"Expected octal 0o7_777 = 4095"
);
Ok(())
}
#[test]
fn test_hex_literals() -> anyhow::Result<()> {
let src = "0xFF 0x0 0xFF_FF 0xFF_FF_FF";
let lexer = TokenType::lexer(src);
let tokens: Vec<_> = lexer.collect::<Result<Vec<_>, _>>()?;
assert_eq!(tokens.len(), 4);
assert!(
matches!(
&tokens[0],
TokenType::Number(Number::Integer(255, Unit::None))
),
"Expected hex 0xFF = 255"
);
assert!(
matches!(
&tokens[1],
TokenType::Number(Number::Integer(0, Unit::None))
),
"Expected hex 0x0 = 0"
);
assert!(
matches!(
&tokens[2],
TokenType::Number(Number::Integer(65535, Unit::None))
),
"Expected hex 0xFF_FF = 65535"
);
assert!(
matches!(
&tokens[3],
TokenType::Number(Number::Integer(16777215, Unit::None))
),
"Expected hex 0xFF_FF_FF = 16777215"
);
Ok(())
}
#[test]
fn test_hex_literals_lowercase() -> anyhow::Result<()> {
let src = "0xff 0xab 0xcd_ef";
let lexer = TokenType::lexer(src);
let tokens: Vec<_> = lexer.collect::<Result<Vec<_>, _>>()?;
assert_eq!(tokens.len(), 3);
assert!(
matches!(
&tokens[0],
TokenType::Number(Number::Integer(255, Unit::None))
),
"Expected hex 0xff = 255"
);
assert!(
matches!(
&tokens[1],
TokenType::Number(Number::Integer(171, Unit::None))
),
"Expected hex 0xab = 171"
);
assert!(
matches!(
&tokens[2],
TokenType::Number(Number::Integer(52719, Unit::None))
),
"Expected hex 0xcd_ef = 52719"
);
Ok(())
}
#[test]
fn test_binary_with_temperature_suffix() -> anyhow::Result<()> {
// Binary, octal, and hex literals do NOT support temperature suffixes
// (temperature suffixes are only for decimal numbers)
// This test verifies that trying to parse something like 0b1010c
// will be treated as 0b101 followed by 0 (the 'c' would start a new token or error)
Ok(())
}
#[test]
fn test_hex_with_temperature_suffix() -> anyhow::Result<()> {
// Hex, octal, and binary literals do NOT support temperature suffixes
// Temperature suffixes are only for decimal numbers
Ok(())
}
} }