From d40b759442f3622fbcd7683dbf5432c991d02f86 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Tue, 9 Dec 2025 13:17:35 -0700 Subject: [PATCH] TEST -- use Cow instead of String for tokens --- rust_compiler/libs/parser/src/lib.rs | 22 +-- rust_compiler/libs/parser/src/tree_node.rs | 190 ++++++++++----------- rust_compiler/libs/tokenizer/src/lib.rs | 16 +- rust_compiler/libs/tokenizer/src/token.rs | 50 +++--- 4 files changed, 140 insertions(+), 138 deletions(-) diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index abd07e0..968dbf2 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -26,27 +26,27 @@ macro_rules! boxed { } #[derive(Error, Debug)] -pub enum Error { +pub enum Error<'a> { #[error(transparent)] Tokenizer(#[from] tokenizer::Error), #[error("Unexpected token: {1}")] - UnexpectedToken(Span, Token), + UnexpectedToken(Span, Token<'a>), #[error("Duplicate identifier: {1}")] - DuplicateIdentifier(Span, Token), + DuplicateIdentifier(Span, Token<'a>), #[error("Invalid Syntax: {1}")] InvalidSyntax(Span, String), #[error("Unsupported Keyword: {1}")] - UnsupportedKeyword(Span, Token), + UnsupportedKeyword(Span, Token<'a>), #[error("Unexpected End of File")] UnexpectedEOF, } -impl From for lsp_types::Diagnostic { +impl<'a> From> for lsp_types::Diagnostic { fn from(value: Error) -> Self { use Error::*; use lsp_types::*; @@ -105,8 +105,8 @@ macro_rules! self_matches_current { pub struct Parser<'a> { tokenizer: TokenizerBuffer<'a>, - current_token: Option, - pub errors: Vec, + current_token: Option>, + pub errors: Vec>, } impl<'a> Parser<'a> { @@ -119,7 +119,7 @@ impl<'a> Parser<'a> { } /// Calculates a Span from a given Token reference. - fn token_to_span(t: &Token) -> Span { + fn token_to_span<'t>(t: &'t Token<'a>) -> Span { Span { start_line: t.line, start_col: t.span.start, @@ -269,14 +269,14 @@ impl<'a> Parser<'a> { Ok(expr) } - fn assign_next(&mut self) -> Result<(), Error> { + fn assign_next(&'a mut self) -> Result<(), Error> { self.current_token = self.tokenizer.next_token()?; Ok(()) } - fn get_next(&mut self) -> Result, Error> { + fn get_next(&'a mut self) -> Result, Error> { self.assign_next()?; - Ok(self.current_token.as_ref()) + Ok(self.current_token.clone()) } fn expression(&mut self) -> Result>, Error> { diff --git a/rust_compiler/libs/parser/src/tree_node.rs b/rust_compiler/libs/parser/src/tree_node.rs index b44759d..963975a 100644 --- a/rust_compiler/libs/parser/src/tree_node.rs +++ b/rust_compiler/libs/parser/src/tree_node.rs @@ -1,22 +1,22 @@ use super::sys_call::SysCall; use crate::sys_call; -use std::ops::Deref; +use std::{borrow::Cow, ops::Deref}; use tokenizer::token::Number; #[derive(Debug, Eq, PartialEq, Clone)] -pub enum Literal { +pub enum Literal<'a> { Number(Number), - String(String), + String(Cow<'a, str>), Boolean(bool), } #[derive(Debug, Eq, PartialEq, Clone)] -pub enum LiteralOr { - Literal(Spanned), +pub enum LiteralOr<'a, T> { + Literal(Spanned>), Or(Spanned), } -impl std::fmt::Display for LiteralOr { +impl<'a, T: std::fmt::Display> std::fmt::Display for LiteralOr<'a, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Literal(l) => write!(f, "{l}"), @@ -25,7 +25,7 @@ impl std::fmt::Display for LiteralOr { } } -impl std::fmt::Display for Literal { +impl<'a> std::fmt::Display for Literal<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Literal::Number(n) => write!(f, "{}", n), @@ -36,16 +36,16 @@ impl std::fmt::Display for Literal { } #[derive(Debug, PartialEq, Eq)] -pub enum BinaryExpression { - Add(Box>, Box>), - Multiply(Box>, Box>), - Divide(Box>, Box>), - Subtract(Box>, Box>), - Exponent(Box>, Box>), - Modulo(Box>, Box>), +pub enum BinaryExpression<'a> { + Add(Box>>, Box>>), + Multiply(Box>>, Box>>), + Divide(Box>>, Box>>), + Subtract(Box>>, Box>>), + Exponent(Box>>, Box>>), + Modulo(Box>>, Box>>), } -impl std::fmt::Display for BinaryExpression { +impl<'a> std::fmt::Display for BinaryExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { BinaryExpression::Add(l, r) => write!(f, "({} + {})", l, r), @@ -59,19 +59,19 @@ impl std::fmt::Display for BinaryExpression { } #[derive(Debug, PartialEq, Eq)] -pub enum LogicalExpression { - And(Box>, Box>), - Or(Box>, Box>), - Not(Box>), - Equal(Box>, Box>), - NotEqual(Box>, Box>), - GreaterThan(Box>, Box>), - GreaterThanOrEqual(Box>, Box>), - LessThan(Box>, Box>), - LessThanOrEqual(Box>, Box>), +pub enum LogicalExpression<'a> { + And(Box>>, Box>>), + Or(Box>>, Box>>), + Not(Box>>), + Equal(Box>>, Box>>), + NotEqual(Box>>, Box>>), + GreaterThan(Box>>, Box>>), + GreaterThanOrEqual(Box>>, Box>>), + LessThan(Box>>, Box>>), + LessThanOrEqual(Box>>, Box>>), } -impl std::fmt::Display for LogicalExpression { +impl<'a> std::fmt::Display for LogicalExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { LogicalExpression::And(l, r) => write!(f, "({} && {})", l, r), @@ -88,25 +88,25 @@ impl std::fmt::Display for LogicalExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct AssignmentExpression { - pub assignee: Box>, - pub expression: Box>, +pub struct AssignmentExpression<'a> { + pub assignee: Box>>, + pub expression: Box>>, } -impl std::fmt::Display for AssignmentExpression { +impl<'a> std::fmt::Display for AssignmentExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "({} = {})", self.assignee, self.expression) } } #[derive(Debug, PartialEq, Eq)] -pub struct FunctionExpression { - pub name: Spanned, - pub arguments: Vec>, - pub body: BlockExpression, +pub struct FunctionExpression<'a> { + pub name: Spanned>, + pub arguments: Vec>>, + pub body: BlockExpression<'a>, } -impl std::fmt::Display for FunctionExpression { +impl<'a> std::fmt::Display for FunctionExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -123,9 +123,9 @@ impl std::fmt::Display for FunctionExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct BlockExpression(pub Vec>); +pub struct BlockExpression<'a>(pub Vec>>); -impl std::fmt::Display for BlockExpression { +impl<'a> std::fmt::Display for BlockExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -140,12 +140,12 @@ impl std::fmt::Display for BlockExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct InvocationExpression { - pub name: Spanned, - pub arguments: Vec>, +pub struct InvocationExpression<'a> { + pub name: Spanned>, + pub arguments: Vec>>, } -impl std::fmt::Display for InvocationExpression { +impl<'a> std::fmt::Display for InvocationExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -161,25 +161,25 @@ impl std::fmt::Display for InvocationExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct MemberAccessExpression { - pub object: Box>, - pub member: Spanned, +pub struct MemberAccessExpression<'a> { + pub object: Box>>, + pub member: Spanned>, } -impl std::fmt::Display for MemberAccessExpression { +impl<'a> std::fmt::Display for MemberAccessExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}.{}", self.object, self.member) } } #[derive(Debug, PartialEq, Eq)] -pub struct MethodCallExpression { - pub object: Box>, - pub method: Spanned, - pub arguments: Vec>, +pub struct MethodCallExpression<'a> { + pub object: Box>>, + pub method: Spanned>, + pub arguments: Vec>>, } -impl std::fmt::Display for MethodCallExpression { +impl<'a> std::fmt::Display for MethodCallExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -196,12 +196,12 @@ impl std::fmt::Display for MethodCallExpression { } #[derive(Debug, PartialEq, Eq)] -pub enum LiteralOrVariable { - Literal(Literal), - Variable(Spanned), +pub enum LiteralOrVariable<'a> { + Literal(Literal<'a>), + Variable(Spanned>), } -impl std::fmt::Display for LiteralOrVariable { +impl<'a> std::fmt::Display for LiteralOrVariable<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { LiteralOrVariable::Literal(l) => write!(f, "{}", l), @@ -211,46 +211,46 @@ impl std::fmt::Display for LiteralOrVariable { } #[derive(Debug, PartialEq, Eq)] -pub struct ConstDeclarationExpression { - pub name: Spanned, - pub value: LiteralOr, +pub struct ConstDeclarationExpression<'a> { + pub name: Spanned>, + pub value: LiteralOr<'a, SysCall>, } -impl ConstDeclarationExpression { +impl<'a> ConstDeclarationExpression<'a> { pub fn is_syscall_supported(call: &SysCall) -> bool { use sys_call::System; matches!(call, SysCall::System(sys) if matches!(sys, System::Hash(_))) } } -impl std::fmt::Display for ConstDeclarationExpression { +impl<'a> std::fmt::Display for ConstDeclarationExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(const {} = {})", self.name, self.value) } } #[derive(Debug, PartialEq, Eq)] -pub struct DeviceDeclarationExpression { +pub struct DeviceDeclarationExpression<'a> { /// any variable-like name - pub name: Spanned, + pub name: Spanned>, /// The device port, ex. (db, d0, d1, d2, d3, d4, d5) - pub device: String, + pub device: Cow<'a, str>, } -impl std::fmt::Display for DeviceDeclarationExpression { +impl<'a> std::fmt::Display for DeviceDeclarationExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(device {} = {})", self.name, self.device) } } #[derive(Debug, PartialEq, Eq)] -pub struct IfExpression { - pub condition: Box>, - pub body: Spanned, - pub else_branch: Option>>, +pub struct IfExpression<'a> { + pub condition: Box>>, + pub body: Spanned>, + pub else_branch: Option>>>, } -impl std::fmt::Display for IfExpression { +impl<'a> std::fmt::Display for IfExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(if ({}) {}", self.condition, self.body)?; if let Some(else_branch) = &self.else_branch { @@ -261,23 +261,23 @@ impl std::fmt::Display for IfExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct LoopExpression { - pub body: Spanned, +pub struct LoopExpression<'a> { + pub body: Spanned>, } -impl std::fmt::Display for LoopExpression { +impl<'a> std::fmt::Display for LoopExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(loop {})", self.body) } } #[derive(Debug, PartialEq, Eq)] -pub struct WhileExpression { - pub condition: Box>, - pub body: BlockExpression, +pub struct WhileExpression<'a> { + pub condition: Box>>, + pub body: BlockExpression<'a>, } -impl std::fmt::Display for WhileExpression { +impl<'a> std::fmt::Display for WhileExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(while {} {})", self.condition, self.body) } @@ -345,32 +345,32 @@ impl Deref for Spanned { } #[derive(Debug, PartialEq, Eq)] -pub enum Expression { - Assignment(Spanned), - Binary(Spanned), - Block(Spanned), +pub enum Expression<'a> { + Assignment(Spanned>), + Binary(Spanned>), + Block(Spanned>), Break(Span), - ConstDeclaration(Spanned), + ConstDeclaration(Spanned>), Continue(Span), - Declaration(Spanned, Box>), - DeviceDeclaration(Spanned), - Function(Spanned), - If(Spanned), - Invocation(Spanned), - Literal(Spanned), - Logical(Spanned), - Loop(Spanned), - MemberAccess(Spanned), - MethodCall(Spanned), - Negation(Box>), - Priority(Box>), - Return(Box>), + Declaration(Spanned>, Box>>), + DeviceDeclaration(Spanned>), + Function(Spanned>), + If(Spanned>), + Invocation(Spanned>), + Literal(Spanned>), + Logical(Spanned>), + Loop(Spanned>), + MemberAccess(Spanned>), + MethodCall(Spanned>), + Negation(Box>>), + Priority(Box>>), + Return(Box>>), Syscall(Spanned), Variable(Spanned), - While(Spanned), + While(Spanned>), } -impl std::fmt::Display for Expression { +impl<'a> std::fmt::Display for Expression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Expression::Assignment(e) => write!(f, "{}", e), diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs index 8138ea5..c991047 100644 --- a/rust_compiler/libs/tokenizer/src/lib.rs +++ b/rust_compiler/libs/tokenizer/src/lib.rs @@ -38,7 +38,7 @@ pub trait Tokenize: Read + Seek {} impl Tokenize for T where T: Read + Seek {} pub struct Tokenizer<'a> { - lexer: Lexer<'a, TokenType>, + lexer: Lexer<'a, TokenType<'a>>, returned_eof: bool, } @@ -52,14 +52,14 @@ impl<'a> From<&'a str> for Tokenizer<'a> { } impl<'a> Tokenizer<'a> { - fn get_token(&mut self, t_type: TokenType) -> Token { + fn get_token(&mut self, t_type: TokenType<'a>) -> Token<'a> { let mut span = self.lexer.span(); span.start -= self.lexer.extras.line_start_index; span.end -= self.lexer.extras.line_start_index; Token::new(t_type, self.lexer.extras.line_count, span) } - pub fn next_token(&mut self) -> Result, Error> { + pub fn next_token(&mut self) -> Result>, Error> { let mut current = self.lexer.next().transpose(); while matches!(current, Ok(Some(TokenType::Comment(_)))) { @@ -73,7 +73,7 @@ impl<'a> Tokenizer<'a> { // ... Iterator and TokenizerBuffer implementations remain unchanged ... // They just call the methods above which now use the passed-in start coordinates. impl<'a> Iterator for Tokenizer<'a> { - type Item = Result; + type Item = Result, Error>; fn next(&mut self) -> Option { match self.lexer.next() { None => { @@ -98,8 +98,8 @@ impl<'a> Iterator for Tokenizer<'a> { pub struct TokenizerBuffer<'a> { tokenizer: Tokenizer<'a>, - buffer: VecDeque, - history: VecDeque, + buffer: VecDeque>, + history: VecDeque>, index: i64, } @@ -112,7 +112,7 @@ impl<'a> TokenizerBuffer<'a> { index: 0, } } - pub fn next_token(&mut self) -> Result, Error> { + pub fn next_token(&mut self) -> Result>, Error> { if let Some(token) = self.buffer.pop_front() { self.history.push_back(token.clone()); self.index += 1; @@ -127,7 +127,7 @@ impl<'a> TokenizerBuffer<'a> { self.index += 1; Ok(token) } - pub fn peek(&mut self) -> Result, Error> { + pub fn peek(&mut self) -> Result>, Error> { if let Some(token) = self.buffer.front() { return Ok(Some(token.clone())); } diff --git a/rust_compiler/libs/tokenizer/src/token.rs b/rust_compiler/libs/tokenizer/src/token.rs index f4a5c99..bfda737 100644 --- a/rust_compiler/libs/tokenizer/src/token.rs +++ b/rust_compiler/libs/tokenizer/src/token.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use helpers::prelude::*; use logos::{Lexer, Logos, Skip, Span}; use lsp_types::{Diagnostic, DiagnosticSeverity, Position, Range}; @@ -43,7 +45,7 @@ impl From for Diagnostic { } impl LexError { - pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType>) -> Self { + pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Self { let mut span = lex.span(); let line = lex.extras.line_count; span.start -= lex.extras.line_start_index; @@ -68,30 +70,30 @@ pub struct Extras { pub line_start_index: usize, } -fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType>) -> Skip { +fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Skip { lex.extras.line_count += 1; lex.extras.line_start_index = lex.span().end; Skip } #[derive(Debug, PartialEq, Eq, Clone)] -pub struct Token { +pub struct Token<'a> { /// The type of the token - pub token_type: TokenType, + pub token_type: TokenType<'a>, /// The line where the token was found pub line: usize, /// The span where the token starts and ends pub span: Span, } -impl std::fmt::Display for Token { +impl<'a> std::fmt::Display for Token<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.token_type) } } -impl Token { - pub fn new(token_type: TokenType, line: usize, span: Span) -> Self { +impl<'a> Token<'a> { + pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self { Self { token_type, line, @@ -158,22 +160,22 @@ macro_rules! keyword { #[logos(skip r"[ \t\f]+")] #[logos(extras = Extras)] #[logos(error(LexError, LexError::from_lexer))] -pub enum TokenType { +pub enum TokenType<'a> { #[regex(r"\n", update_line_index)] Newline, // matches strings with double quotes #[regex(r#""(?:[^"\\]|\\.)*""#, |v| { let str = v.slice(); - str[1..str.len() - 1].to_string() + Cow::from(&str[1..str.len() - 1]) })] // matches strings with single quotes #[regex(r#"'(?:[^'\\]|\\.)*'"#, |v| { let str = v.slice(); - str[1..str.len() - 1].to_string() + Cow::from(&str[1..str.len() - 1]) })] /// Represents a string token - String(String), + String(Cow<'a, str>), #[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)] /// Represents a number token @@ -199,9 +201,9 @@ pub enum TokenType { /// Represents a keyword token Keyword(Keyword), - #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| v.slice().to_string())] + #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| Cow::from(v.slice()))] /// Represents an identifier token - Identifier(String), + Identifier(Cow<'a, str>), #[token("(", symbol!(LParen))] #[token(")", symbol!(RParen))] @@ -236,29 +238,29 @@ pub enum TokenType { #[token("//", |lex| Comment::Line(read_line(lex)))] #[token("///", |lex| Comment::Doc(read_line(lex)))] /// Represents a comment, both a line comment and a doc comment - Comment(Comment), + Comment(Comment<'a>), #[end] /// Represents an end of file token EOF, } -fn read_line<'a>(lexer: &mut Lexer<'a, TokenType>) -> String { +fn read_line<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Cow<'a, str> { let rem = lexer.remainder(); let len = rem.find('\n').unwrap_or(rem.len()); let content = rem[..len].trim().to_string(); lexer.bump(len); - content + Cow::from(content) } #[derive(Hash, Debug, Eq, PartialEq, Clone)] -pub enum Comment { - Line(String), - Doc(String), +pub enum Comment<'a> { + Line(Cow<'a, str>), + Doc(Cow<'a, str>), } -fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType>) -> Result { +fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result { let slice = lexer.slice(); let last_char = slice.chars().last().unwrap_or_default(); let (num_str, suffix) = match last_char { @@ -304,7 +306,7 @@ fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType>) -> Result std::fmt::Display for Comment<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Line(c) => write!(f, "// {}", c), @@ -321,7 +323,7 @@ impl std::fmt::Display for Comment { } } -impl Documentation for TokenType { +impl<'a> Documentation for TokenType<'a> { fn docs(&self) -> String { match self { Self::Keyword(k) => k.docs(), @@ -336,7 +338,7 @@ impl Documentation for TokenType { helpers::with_syscalls!(generate_check); -impl From for u32 { +impl<'a> From> for u32 { fn from(value: TokenType) -> Self { match value { TokenType::String(_) => 1, @@ -376,7 +378,7 @@ impl From for u32 { } } -impl std::fmt::Display for TokenType { +impl<'a> std::fmt::Display for TokenType<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { TokenType::String(s) => write!(f, "{}", s),