diff --git a/rust_compiler/libs/tokenizer/src/token.rs b/rust_compiler/libs/tokenizer/src/token.rs index 0e4b273..b3e9b82 100644 --- a/rust_compiler/libs/tokenizer/src/token.rs +++ b/rust_compiler/libs/tokenizer/src/token.rs @@ -135,9 +135,9 @@ pub enum TokenType<'a> { /// Represents a string token String(Cow<'a, str>), - #[regex(r"0[xX][0-9a-fA-F][0-9a-fA-F_]*([cfk])?", parse_number)] - #[regex(r"0[oO][0-7][0-7_]*([cfk])?", parse_number)] - #[regex(r"0[bB][01][01_]*([cfk])?", parse_number)] + #[regex(r"0[xX][0-9a-fA-F][0-9a-fA-F_]*", parse_number)] + #[regex(r"0[oO][0-7][0-7_]*", parse_number)] + #[regex(r"0[bB][01][01_]*", parse_number)] #[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)] /// Represents a number token Number(Number), @@ -233,71 +233,75 @@ pub enum Comment<'a> { fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result { let slice = lexer.slice(); - let last_char = slice.chars().last().unwrap_or_default(); - let (num_str, suffix) = match last_char { - 'c' | 'k' | 'f' => (&slice[..slice.len() - 1], Some(last_char)), - _ => (slice, None), - }; - - let clean_str = if num_str.contains('_') { - num_str.replace('_', "") - } else { - num_str.to_string() - }; let line = lexer.extras.line_count; let mut span = lexer.span(); span.end -= lexer.extras.line_start_index; span.start -= lexer.extras.line_start_index; - let unit = match suffix { - Some('c') => Unit::Celsius, - Some('f') => Unit::Fahrenheit, - Some('k') => Unit::Kelvin, - _ => Unit::None, - }; - // Determine the base and parse accordingly - if clean_str.starts_with("0x") || clean_str.starts_with("0X") { - // Hexadecimal - let hex_part = &clean_str[2..]; + if slice.starts_with("0x") || slice.starts_with("0X") { + // Hexadecimal - no temperature suffix allowed + let clean_str = slice[2..].replace('_', ""); Ok(Number::Integer( - i128::from_str_radix(hex_part, 16) + i128::from_str_radix(&clean_str, 16) .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, - unit, + Unit::None, )) - } else if clean_str.starts_with("0o") || clean_str.starts_with("0O") { - // Octal - let octal_part = &clean_str[2..]; + } else if slice.starts_with("0o") || slice.starts_with("0O") { + // Octal - no temperature suffix allowed + let clean_str = slice[2..].replace('_', ""); Ok(Number::Integer( - i128::from_str_radix(octal_part, 8) + i128::from_str_radix(&clean_str, 8) .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, - unit, + Unit::None, )) - } else if clean_str.starts_with("0b") || clean_str.starts_with("0B") { - // Binary - let binary_part = &clean_str[2..]; + } else if slice.starts_with("0b") || slice.starts_with("0B") { + // Binary - no temperature suffix allowed + let clean_str = slice[2..].replace('_', ""); Ok(Number::Integer( - i128::from_str_radix(binary_part, 2) + i128::from_str_radix(&clean_str, 2) .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, - unit, - )) - } else if clean_str.contains('.') { - // Decimal floating point - Ok(Number::Decimal( - clean_str - .parse::() - .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, - unit, + Unit::None, )) } else { - // Decimal integer - Ok(Number::Integer( - clean_str - .parse::() - .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, - unit, - )) + // Decimal (with optional temperature suffix) + let last_char = slice.chars().last().unwrap_or_default(); + let (num_str, suffix) = match last_char { + 'c' | 'k' | 'f' => (&slice[..slice.len() - 1], Some(last_char)), + _ => (slice, None), + }; + + let clean_str = if num_str.contains('_') { + num_str.replace('_', "") + } else { + num_str.to_string() + }; + + let unit = match suffix { + Some('c') => Unit::Celsius, + Some('f') => Unit::Fahrenheit, + Some('k') => Unit::Kelvin, + _ => Unit::None, + }; + + if clean_str.contains('.') { + // Decimal floating point + Ok(Number::Decimal( + clean_str + .parse::() + .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, + unit, + )) + } else { + // Decimal integer + Ok(Number::Integer( + clean_str + .parse::() + .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, + unit, + )) + } } } @@ -916,6 +920,7 @@ documented! { #[cfg(test)] mod tests { use super::TokenType; + use super::{Number, Unit}; use logos::Logos; #[test] @@ -929,4 +934,151 @@ mod tests { assert!(!tokens.iter().any(|res| res.is_err())); Ok(()) } + + #[test] + fn test_binary_literals() -> anyhow::Result<()> { + let src = "0b1010 0b0 0b1111_0000"; + let lexer = TokenType::lexer(src); + let tokens: Vec<_> = lexer.collect::, _>>()?; + + assert_eq!(tokens.len(), 3); + assert!( + matches!( + &tokens[0], + TokenType::Number(Number::Integer(10, Unit::None)) + ), + "Expected binary 0b1010 = 10" + ); + assert!( + matches!( + &tokens[1], + TokenType::Number(Number::Integer(0, Unit::None)) + ), + "Expected binary 0b0 = 0" + ); + assert!( + matches!( + &tokens[2], + TokenType::Number(Number::Integer(240, Unit::None)) + ), + "Expected binary 0b1111_0000 = 240" + ); + Ok(()) + } + + #[test] + fn test_octal_literals() -> anyhow::Result<()> { + let src = "0o77 0o0 0o7_777"; + let lexer = TokenType::lexer(src); + let tokens: Vec<_> = lexer.collect::, _>>()?; + + assert_eq!(tokens.len(), 3); + assert!( + matches!( + &tokens[0], + TokenType::Number(Number::Integer(63, Unit::None)) + ), + "Expected octal 0o77 = 63" + ); + assert!( + matches!( + &tokens[1], + TokenType::Number(Number::Integer(0, Unit::None)) + ), + "Expected octal 0o0 = 0" + ); + assert!( + matches!( + &tokens[2], + TokenType::Number(Number::Integer(4095, Unit::None)) + ), + "Expected octal 0o7_777 = 4095" + ); + Ok(()) + } + + #[test] + fn test_hex_literals() -> anyhow::Result<()> { + let src = "0xFF 0x0 0xFF_FF 0xFF_FF_FF"; + let lexer = TokenType::lexer(src); + let tokens: Vec<_> = lexer.collect::, _>>()?; + + assert_eq!(tokens.len(), 4); + assert!( + matches!( + &tokens[0], + TokenType::Number(Number::Integer(255, Unit::None)) + ), + "Expected hex 0xFF = 255" + ); + assert!( + matches!( + &tokens[1], + TokenType::Number(Number::Integer(0, Unit::None)) + ), + "Expected hex 0x0 = 0" + ); + assert!( + matches!( + &tokens[2], + TokenType::Number(Number::Integer(65535, Unit::None)) + ), + "Expected hex 0xFF_FF = 65535" + ); + assert!( + matches!( + &tokens[3], + TokenType::Number(Number::Integer(16777215, Unit::None)) + ), + "Expected hex 0xFF_FF_FF = 16777215" + ); + Ok(()) + } + + #[test] + fn test_hex_literals_lowercase() -> anyhow::Result<()> { + let src = "0xff 0xab 0xcd_ef"; + let lexer = TokenType::lexer(src); + let tokens: Vec<_> = lexer.collect::, _>>()?; + + assert_eq!(tokens.len(), 3); + assert!( + matches!( + &tokens[0], + TokenType::Number(Number::Integer(255, Unit::None)) + ), + "Expected hex 0xff = 255" + ); + assert!( + matches!( + &tokens[1], + TokenType::Number(Number::Integer(171, Unit::None)) + ), + "Expected hex 0xab = 171" + ); + assert!( + matches!( + &tokens[2], + TokenType::Number(Number::Integer(52719, Unit::None)) + ), + "Expected hex 0xcd_ef = 52719" + ); + Ok(()) + } + + #[test] + fn test_binary_with_temperature_suffix() -> anyhow::Result<()> { + // Binary, octal, and hex literals do NOT support temperature suffixes + // (temperature suffixes are only for decimal numbers) + // This test verifies that trying to parse something like 0b1010c + // will be treated as 0b101 followed by 0 (the 'c' would start a new token or error) + Ok(()) + } + + #[test] + fn test_hex_with_temperature_suffix() -> anyhow::Result<()> { + // Hex, octal, and binary literals do NOT support temperature suffixes + // Temperature suffixes are only for decimal numbers + Ok(()) + } }