From 50d8f90816d49ae6ab06ed7715d97bde11a08ecb Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 30 Nov 2025 14:56:54 -0700 Subject: [PATCH 01/12] WIP -- Expressions should know what their span is --- rust_compiler/Cargo.lock | 8 +- rust_compiler/libs/parser/src/lib.rs | 58 +++++++++- rust_compiler/libs/parser/src/tree_node.rs | 119 +++++++++++++-------- 3 files changed, 134 insertions(+), 51 deletions(-) diff --git a/rust_compiler/Cargo.lock b/rust_compiler/Cargo.lock index 7a9df42..3d8cef8 100644 --- a/rust_compiler/Cargo.lock +++ b/rust_compiler/Cargo.lock @@ -1071,18 +1071,18 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "zerocopy" -version = "0.8.30" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea879c944afe8a2b25fef16bb4ba234f47c694565e97383b36f3a878219065c" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.30" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf955aa904d6040f70dc8e9384444cb1030aed272ba3cb09bbc4ab9e7c1f34f5" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index 3789894..c655634 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -129,17 +129,62 @@ impl<'a> Parser<'a> { /// Parses all the input from the tokenizer buffer and returns the resulting expression /// Expressions are returned in a root block expression node pub fn parse_all(&mut self) -> Result, Error> { - let mut expressions = Vec::::new(); + // peek at what the first token would be and extract the line and col + let (start_line, start_col) = self + .tokenizer + .peek()? + .map(|tok| (tok.line, tok.column)) + .unwrap_or((1, 1)); + + let mut expressions = Vec::>::new(); while let Some(expression) = self.parse()? { expressions.push(expression); } - Ok(Some(Expression::Block(BlockExpression(expressions)))) + if expressions.is_empty() { + let span = Span { + start_line, + end_line: start_line, + start_col, + end_col: start_col, + }; + + return Ok(Some(Expression::Block(Spanned { + node: BlockExpression(expressions), + span, + }))); + } + + self.tokenizer.seek(SeekFrom::Current(-1))?; + + // Ignore the EOF, we want the previous token to define what the end of the source is. + let (end_line, end_col) = self + .tokenizer + .peek()? + .map(|tok| { + ( + tok.line, + tok.column + tok.original_string.unwrap_or_default().len(), + ) + }) + .unwrap_or((start_line, start_col)); + + let span = Span { + start_line, + end_line, + start_col, + end_col, + }; + + Ok(Some(Expression::Block(Spanned { + node: BlockExpression(expressions), + span, + }))) } /// Parses the input from the tokenizer buffer and returns the resulting expression - pub fn parse(&mut self) -> Result, Error> { + pub fn parse(&mut self) -> Result>, Error> { self.assign_next()?; let expr = self.expression()?; @@ -163,7 +208,12 @@ impl<'a> Parser<'a> { } /// Parses an expression, handling binary operations with correct precedence. - fn expression(&mut self) -> Result, Error> { + fn expression(&mut self) -> Result>, Error> { + let (start_line, end_line) = self + .current_token + .map(|tok| (tok.line, tok.column)) + .ok_or(Error::UnexpectedEOF)?; + // Parse the Left Hand Side (unary/primary expression) let lhs = self.unary()?; diff --git a/rust_compiler/libs/parser/src/tree_node.rs b/rust_compiler/libs/parser/src/tree_node.rs index 3e49e09..21588d2 100644 --- a/rust_compiler/libs/parser/src/tree_node.rs +++ b/rust_compiler/libs/parser/src/tree_node.rs @@ -1,3 +1,5 @@ +use std::ops::Deref; + use super::sys_call::SysCall; use tokenizer::token::Number; @@ -20,12 +22,12 @@ impl std::fmt::Display for Literal { #[derive(Debug, PartialEq, Eq)] pub enum BinaryExpression { - Add(Box, Box), - Multiply(Box, Box), - Divide(Box, Box), - Subtract(Box, Box), - Exponent(Box, Box), - Modulo(Box, Box), + Add(Box>, Box>), + Multiply(Box>, Box>), + Divide(Box>, Box>), + Subtract(Box>, Box>), + Exponent(Box>, Box>), + Modulo(Box>, Box>), } impl std::fmt::Display for BinaryExpression { @@ -43,15 +45,15 @@ impl std::fmt::Display for BinaryExpression { #[derive(Debug, PartialEq, Eq)] pub enum LogicalExpression { - And(Box, Box), - Or(Box, Box), - Not(Box), - Equal(Box, Box), - NotEqual(Box, Box), - GreaterThan(Box, Box), - GreaterThanOrEqual(Box, Box), - LessThan(Box, Box), - LessThanOrEqual(Box, Box), + And(Box>, Box>), + Or(Box>, Box>), + Not(Box>), + Equal(Box>, Box>), + NotEqual(Box>, Box>), + GreaterThan(Box>, Box>), + GreaterThanOrEqual(Box>, Box>), + LessThan(Box>, Box>), + LessThanOrEqual(Box>, Box>), } impl std::fmt::Display for LogicalExpression { @@ -73,7 +75,7 @@ impl std::fmt::Display for LogicalExpression { #[derive(Debug, PartialEq, Eq)] pub struct AssignmentExpression { pub identifier: String, - pub expression: Box, + pub expression: Box>, } impl std::fmt::Display for AssignmentExpression { @@ -102,7 +104,7 @@ impl std::fmt::Display for FunctionExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct BlockExpression(pub Vec); +pub struct BlockExpression(pub Vec>); impl std::fmt::Display for BlockExpression { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -170,9 +172,9 @@ impl std::fmt::Display for DeviceDeclarationExpression { #[derive(Debug, PartialEq, Eq)] pub struct IfExpression { - pub condition: Box, - pub body: BlockExpression, - pub else_branch: Option>, + pub condition: Box>, + pub body: Spanned, + pub else_branch: Option>>, } impl std::fmt::Display for IfExpression { @@ -187,7 +189,7 @@ impl std::fmt::Display for IfExpression { #[derive(Debug, PartialEq, Eq)] pub struct LoopExpression { - pub body: BlockExpression, + pub body: Spanned, } impl std::fmt::Display for LoopExpression { @@ -198,7 +200,7 @@ impl std::fmt::Display for LoopExpression { #[derive(Debug, PartialEq, Eq)] pub struct WhileExpression { - pub condition: Box, + pub condition: Box>, pub body: BlockExpression, } @@ -208,27 +210,58 @@ impl std::fmt::Display for WhileExpression { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Span { + pub start_line: usize, + pub end_line: usize, + pub start_col: usize, + pub end_col: usize, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Spanned { + pub span: Span, + pub node: T, +} + +impl std::fmt::Display for Spanned +where + T: std::fmt::Display, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.node) + } +} + +impl Deref for Spanned { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.node + } +} + #[derive(Debug, PartialEq, Eq)] pub enum Expression { - Assignment(AssignmentExpression), - Binary(BinaryExpression), - Block(BlockExpression), - Break, - Continue, - Declaration(String, Box), - DeviceDeclaration(DeviceDeclarationExpression), - Function(FunctionExpression), - If(IfExpression), - Invocation(InvocationExpression), - Literal(Literal), - Logical(LogicalExpression), - Loop(LoopExpression), - Negation(Box), - Priority(Box), - Return(Box), - Syscall(SysCall), - Variable(String), - While(WhileExpression), + Assignment(Spanned), + Binary(Spanned), + Block(Spanned), + Break(Span), + Continue(Span), + Declaration(String, Box>), + DeviceDeclaration(Spanned), + Function(Spanned), + If(Spanned), + Invocation(Spanned), + Literal(Spanned), + Logical(Spanned), + Loop(Spanned), + Negation(Box>), + Priority(Box>), + Return(Box>), + Syscall(Spanned), + Variable(Spanned), + While(Spanned), } impl std::fmt::Display for Expression { @@ -237,8 +270,8 @@ impl std::fmt::Display for Expression { Expression::Assignment(e) => write!(f, "{}", e), Expression::Binary(e) => write!(f, "{}", e), Expression::Block(e) => write!(f, "{}", e), - Expression::Break => write!(f, "break"), - Expression::Continue => write!(f, "continue"), + Expression::Break(_) => write!(f, "break"), + Expression::Continue(_) => write!(f, "continue"), Expression::Declaration(id, e) => write!(f, "(let {} = {})", id, e), Expression::DeviceDeclaration(e) => write!(f, "{}", e), Expression::Function(e) => write!(f, "{}", e), From db389045fcc465fd88653bae57a9ee04c5a2fa13 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 30 Nov 2025 15:15:40 -0700 Subject: [PATCH 02/12] emit spanned from parser --- rust_compiler/libs/parser/src/lib.rs | 1334 ++++++++++++++------------ 1 file changed, 719 insertions(+), 615 deletions(-) diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index c655634..2b89b23 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -30,16 +30,16 @@ quick_error! { display("Tokenizer Error: {}", err) source(err) } - UnexpectedToken(token: Token) { + UnexpectedToken(span: Span, token: Token) { display("Unexpected token: {:?}", token) } - DuplicateIdentifier(token: Token) { + DuplicateIdentifier(span: Span, token: Token) { display("Duplicate identifier: {:?}", token) } - InvalidSyntax(token: Token, reason: String) { - display("Invalid syntax: {:?}, Reason: {}", token, reason) + InvalidSyntax(span: Span, reason: String) { + display("Invalid syntax: {:?}, Reason: {}", span, reason) } - UnsupportedKeyword(token: Token) { + UnsupportedKeyword(span: Span, token: Token) { display("Unsupported keyword: {:?}", token) } UnexpectedEOF { @@ -57,47 +57,6 @@ macro_rules! self_matches_peek { }; } -macro_rules! token_from_option { - ($token:expr) => { - match $token { - Some(ref token) => token.clone(), - None => return Err(Error::UnexpectedEOF), - } - }; - (owned $token:expr) => { - match $token { - Some(token) => token, - None => return Err(Error::UnexpectedEOF), - } - }; -} - -macro_rules! extract_token_data { - ($token:ident, $pattern:pat, $extraction:expr) => { - match $token.token_type { - $pattern => $extraction, - _ => return Err(Error::UnexpectedToken($token.clone())), - } - }; - ($token:expr, $pattern:pat, $extraction:expr) => { - match $token.token_type { - $pattern => $extraction, - _ => { - return Err(Error::UnexpectedToken($token.clone())); - } - } - }; -} - -macro_rules! self_matches_current { - ($self:ident, $pattern:pat) => { - matches!($self.current_token, Some(Token { token_type: $pattern, .. })) - }; - ($self:ident, $pattern:pat if $cond:expr) => { - matches!($self.current_token, Some(Token { token_type: $pattern, .. }) if $cond) - }; -} - macro_rules! token_matches { ($token:ident, $pattern:pat) => { matches!($token.token_type, $pattern) @@ -113,6 +72,15 @@ macro_rules! token_matches { }; } +macro_rules! self_matches_current { + ($self:ident, $pattern:pat) => { + matches!($self.current_token, Some(Token { token_type: $pattern, .. })) + }; + ($self:ident, $pattern:pat if $cond:expr) => { + matches!($self.current_token, Some(Token { token_type: $pattern, .. }) if $cond) + }; +} + pub struct Parser<'a> { tokenizer: TokenizerBuffer<'a>, current_token: Option, @@ -126,13 +94,74 @@ impl<'a> Parser<'a> { } } - /// Parses all the input from the tokenizer buffer and returns the resulting expression - /// Expressions are returned in a root block expression node + /// Calculates a Span from a given Token reference. + /// This is a static helper to avoid borrowing `self` when we already have a token ref. + fn token_to_span(t: &Token) -> Span { + let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); + Span { + start_line: t.line, + start_col: t.column, + end_line: t.line, + end_col: t.column + len, + } + } + + fn current_span(&self) -> Span { + self.current_token + .as_ref() + .map(Self::token_to_span) + .unwrap_or(Span { + start_line: 0, + start_col: 0, + end_line: 0, + end_col: 0, + }) + } + + /// Helper to run a parsing closure and wrap the result in a Spanned struct + fn spanned(&mut self, parser: F) -> Result, Error> + where + F: FnOnce(&mut Self) -> Result, + { + // Peek at the start token. If no current token (parsing hasn't started), peek the buffer. + let start_token = if self.current_token.is_some() { + self.current_token.clone() + } else { + self.tokenizer.peek()? + }; + + let (start_line, start_col) = start_token + .as_ref() + .map(|t| (t.line, t.column)) + .unwrap_or((1, 1)); + + let node = parser(self)?; + + // The end token is the current_token after parsing. + let end_token = self.current_token.as_ref(); + + let (end_line, end_col) = end_token + .map(|t| { + let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); + (t.line, t.column + len) + }) + .unwrap_or((start_line, start_col)); + + Ok(Spanned { + span: Span { + start_line, + start_col, + end_line, + end_col, + }, + node, + }) + } + pub fn parse_all(&mut self) -> Result, Error> { - // peek at what the first token would be and extract the line and col - let (start_line, start_col) = self - .tokenizer - .peek()? + let first_token = self.tokenizer.peek()?; + let (start_line, start_col) = first_token + .as_ref() .map(|tok| (tok.line, tok.column)) .unwrap_or((1, 1)); @@ -151,22 +180,19 @@ impl<'a> Parser<'a> { }; return Ok(Some(Expression::Block(Spanned { - node: BlockExpression(expressions), + node: BlockExpression(vec![]), span, }))); } self.tokenizer.seek(SeekFrom::Current(-1))?; - // Ignore the EOF, we want the previous token to define what the end of the source is. - let (end_line, end_col) = self - .tokenizer - .peek()? + let end_token_opt = self.tokenizer.peek()?; + + let (end_line, end_col) = end_token_opt .map(|tok| { - ( - tok.line, - tok.column + tok.original_string.unwrap_or_default().len(), - ) + let len = tok.original_string.as_ref().map(|s| s.len()).unwrap_or(0); + (tok.line, tok.column + len) }) .unwrap_or((start_line, start_col)); @@ -183,7 +209,6 @@ impl<'a> Parser<'a> { }))) } - /// Parses the input from the tokenizer buffer and returns the resulting expression pub fn parse(&mut self) -> Result>, Error> { self.assign_next()?; let expr = self.expression()?; @@ -195,25 +220,17 @@ impl<'a> Parser<'a> { Ok(expr) } - /// Assigns the next token in the tokenizer buffer to the current token fn assign_next(&mut self) -> Result<(), Error> { self.current_token = self.tokenizer.next_token()?; Ok(()) } - /// Calls `assign_next` and returns the next token in the tokenizer buffer fn get_next(&mut self) -> Result, Error> { self.assign_next()?; Ok(self.current_token.as_ref()) } - /// Parses an expression, handling binary operations with correct precedence. fn expression(&mut self) -> Result>, Error> { - let (start_line, end_line) = self - .current_token - .map(|tok| (tok.line, tok.column)) - .ok_or(Error::UnexpectedEOF)?; - // Parse the Left Hand Side (unary/primary expression) let lhs = self.unary()?; @@ -227,10 +244,7 @@ impl<'a> Parser<'a> { TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() ) { return Ok(Some(self.infix(lhs)?)); - } - // This is an edge case. We need to move back one token if the current token is an operator - // so the binary expression can pick up the operator - else if self_matches_current!( + } else if self_matches_current!( self, TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() ) { @@ -241,10 +255,7 @@ impl<'a> Parser<'a> { Ok(Some(lhs)) } - /// Parses a unary or primary expression. - /// This handles prefix operators (like negation) and atomic expressions (literals, variables, etc.), - /// but stops before consuming binary operators. - fn unary(&mut self) -> Result, Error> { + fn unary(&mut self) -> Result>, Error> { macro_rules! matches_keyword { ($keyword:expr, $($pattern:pat),+) => { matches!($keyword, $($pattern)|+) @@ -260,168 +271,307 @@ impl<'a> Parser<'a> { } let expr = match current_token.token_type { - // match unsupported keywords TokenType::Keyword(e) if matches_keyword!(e, Keyword::Enum) => { - return Err(Error::UnsupportedKeyword(current_token.clone())); + return Err(Error::UnsupportedKeyword( + self.current_span(), + current_token.clone(), + )); } - // match declarations with a `let` keyword - TokenType::Keyword(Keyword::Let) => self.declaration()?, + TokenType::Keyword(Keyword::Let) => { + // declaration is wrapped in spanned inside the function, but expects 'let' to be current + Some(self.spanned(|p| p.declaration())?) + } - TokenType::Keyword(Keyword::Device) => Expression::DeviceDeclaration(self.device()?), + TokenType::Keyword(Keyword::Device) => { + let spanned_dev = self.spanned(|p| p.device())?; + Some(Spanned { + span: spanned_dev.span, + node: Expression::DeviceDeclaration(spanned_dev), + }) + } - // match functions with a `fn` keyword - TokenType::Keyword(Keyword::Fn) => Expression::Function(self.function()?), + TokenType::Keyword(Keyword::Fn) => { + let spanned_fn = self.spanned(|p| p.function())?; + Some(Spanned { + span: spanned_fn.span, + node: Expression::Function(spanned_fn), + }) + } - // match if statements - TokenType::Keyword(Keyword::If) => Expression::If(self.if_expression()?), + TokenType::Keyword(Keyword::If) => { + let spanned_if = self.spanned(|p| p.if_expression())?; + Some(Spanned { + span: spanned_if.span, + node: Expression::If(spanned_if), + }) + } - // match loop statements - TokenType::Keyword(Keyword::Loop) => Expression::Loop(self.loop_expression()?), + TokenType::Keyword(Keyword::Loop) => { + let spanned_loop = self.spanned(|p| p.loop_expression())?; + Some(Spanned { + span: spanned_loop.span, + node: Expression::Loop(spanned_loop), + }) + } - // match while statements - TokenType::Keyword(Keyword::While) => Expression::While(self.while_expression()?), + TokenType::Keyword(Keyword::While) => { + let spanned_while = self.spanned(|p| p.while_expression())?; + Some(Spanned { + span: spanned_while.span, + node: Expression::While(spanned_while), + }) + } - // match break statements TokenType::Keyword(Keyword::Break) => { + let span = self.current_span(); // make sure the next token is a semi-colon - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } - Expression::Break + Some(Spanned { + span, + node: Expression::Break(span), + }) } - // match continue statements TokenType::Keyword(Keyword::Continue) => { - // make sure the next token is a semi-colon - let next = token_from_option!(self.get_next()?); + let span = self.current_span(); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } - Expression::Continue + Some(Spanned { + span, + node: Expression::Continue(span), + }) } - // match syscalls with a `syscall` keyword TokenType::Identifier(ref id) if SysCall::is_syscall(id) => { - Expression::Syscall(self.syscall()?) + let spanned_call = self.spanned(|p| p.syscall())?; + Some(Spanned { + span: spanned_call.span, + node: Expression::Syscall(spanned_call), + }) } - // match a variable expression with opening parenthesis TokenType::Identifier(_) if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => { - Expression::Invocation(self.invocation()?) + let spanned_invoke = self.spanned(|p| p.invocation())?; + Some(Spanned { + span: spanned_invoke.span, + node: Expression::Invocation(spanned_invoke), + }) } - // match a variable expression with an assignment TokenType::Identifier(_) if self_matches_peek!(self, TokenType::Symbol(Symbol::Assign)) => { - Expression::Assignment(self.assignment()?) + let spanned_assign = self.spanned(|p| p.assignment())?; + Some(Spanned { + span: spanned_assign.span, + node: Expression::Assignment(spanned_assign), + }) } - // match variable expressions with an identifier - TokenType::Identifier(ref id) => Expression::Variable(id.clone()), + TokenType::Identifier(ref id) => { + let span = self.current_span(); + Some(Spanned { + span, + node: Expression::Variable(Spanned { + span, + node: id.clone(), + }), + }) + } - // match block expressions with a `{` symbol - TokenType::Symbol(Symbol::LBrace) => Expression::Block(self.block()?), + TokenType::Symbol(Symbol::LBrace) => { + let spanned_block = self.spanned(|p| p.block())?; + Some(Spanned { + span: spanned_block.span, + node: Expression::Block(spanned_block), + }) + } - // match literal expressions with a semi-colon afterwards TokenType::Number(_) | TokenType::String(_) | TokenType::Boolean(_) => { - Expression::Literal(self.literal()?) + let spanned_lit = self.spanned(|p| p.literal())?; + Some(Spanned { + span: spanned_lit.span, + node: Expression::Literal(spanned_lit), + }) } - // match priority expressions with a left parenthesis - TokenType::Symbol(Symbol::LParen) => Expression::Priority(self.priority()?), + TokenType::Symbol(Symbol::LParen) => { + // Priority handles its own spanning + self.spanned(|p| p.priority())?.node.map(|node| *node) + } - // match minus symbols to handle negative numbers or negated expressions TokenType::Symbol(Symbol::Minus) => { - self.assign_next()?; // consume the `-` symbol - // IMPORTANT: We call `unary()` here, NOT `expression()`. - // This ensures negation binds tightly to the operand and doesn't consume binary ops. - // e.g. `-1 + 2` parses as `(-1) + 2` + // Need to handle span manually because unary call is next + let start_span = self.current_span(); + self.assign_next()?; let inner_expr = self.unary()?.ok_or(Error::UnexpectedEOF)?; - - Expression::Negation(boxed!(inner_expr)) + let combined_span = Span { + start_line: start_span.start_line, + start_col: start_span.start_col, + end_line: inner_expr.span.end_line, + end_col: inner_expr.span.end_col, + }; + Some(Spanned { + span: combined_span, + node: Expression::Negation(boxed!(inner_expr)), + }) } - // match logical NOT `!` TokenType::Symbol(Symbol::LogicalNot) => { - self.assign_next()?; // consume the `!` symbol + let start_span = self.current_span(); + self.assign_next()?; let inner_expr = self.unary()?.ok_or(Error::UnexpectedEOF)?; - Expression::Logical(LogicalExpression::Not(boxed!(inner_expr))) + let combined_span = Span { + start_line: start_span.start_line, + start_col: start_span.start_col, + end_line: inner_expr.span.end_line, + end_col: inner_expr.span.end_col, + }; + Some(Spanned { + span: combined_span, + node: Expression::Logical(Spanned { + span: combined_span, + node: LogicalExpression::Not(boxed!(inner_expr)), + }), + }) } _ => { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); } }; - Ok(Some(expr)) + Ok(expr) } - fn get_infix_child_node(&mut self) -> Result { - let current_token = token_from_option!(self.current_token); + fn get_infix_child_node(&mut self) -> Result, Error> { + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; match current_token.token_type { - // A literal number or boolean - TokenType::Number(_) | TokenType::Boolean(_) => self.literal().map(Expression::Literal), - // A plain variable - TokenType::Identifier(ident) + TokenType::Number(_) | TokenType::Boolean(_) => { + let lit = self.spanned(|p| p.literal())?; + Ok(Spanned { + span: lit.span, + node: Expression::Literal(lit), + }) + } + TokenType::Identifier(ref ident) if !self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => { - Ok(Expression::Variable(ident)) + let span = self.current_span(); + Ok(Spanned { + span, + node: Expression::Variable(Spanned { + span, + node: ident.clone(), + }), + }) } - // A priority expression ( -> (1 + 2) <- + 3 ) - TokenType::Symbol(Symbol::LParen) => self.priority().map(Expression::Priority), - // A function invocation + TokenType::Symbol(Symbol::LParen) => Ok(*self.spanned(|p| p.priority())?.node.unwrap()), TokenType::Identifier(_) if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => { - self.invocation().map(Expression::Invocation) + let inv = self.spanned(|p| p.invocation())?; + Ok(Spanned { + span: inv.span, + node: Expression::Invocation(inv), + }) } - // Handle Negation TokenType::Symbol(Symbol::Minus) => { + let start_span = self.current_span(); self.assign_next()?; - // recurse to handle double negation or simple negation of atoms let inner = self.get_infix_child_node()?; - Ok(Expression::Negation(boxed!(inner))) + let span = Span { + start_line: start_span.start_line, + start_col: start_span.start_col, + end_line: inner.span.end_line, + end_col: inner.span.end_col, + }; + Ok(Spanned { + span, + node: Expression::Negation(boxed!(inner)), + }) } - // Handle Logical Not TokenType::Symbol(Symbol::LogicalNot) => { + let start_span = self.current_span(); self.assign_next()?; let inner = self.get_infix_child_node()?; - Ok(Expression::Logical(LogicalExpression::Not(boxed!(inner)))) + let span = Span { + start_line: start_span.start_line, + start_col: start_span.start_col, + end_line: inner.span.end_line, + end_col: inner.span.end_col, + }; + Ok(Spanned { + span, + node: Expression::Logical(Spanned { + span, + node: LogicalExpression::Not(boxed!(inner)), + }), + }) } - _ => Err(Error::UnexpectedToken(current_token.clone())), + _ => Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )), } } fn device(&mut self) -> Result { - // sanity check, make sure current token is a `device` keyword - - let current_token = token_from_option!(self.current_token); + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; if !self_matches_current!(self, TokenType::Keyword(Keyword::Device)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); } - let identifier = extract_token_data!( - token_from_option!(self.get_next()?), - TokenType::Identifier(ref id), - id.clone() - ); + let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let identifier = match identifier_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(identifier_token), + identifier_token.clone(), + )); + } + }; - let current_token = token_from_option!(self.get_next()?).clone(); + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { - return Err(Error::UnexpectedToken(current_token)); + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); } - let device = extract_token_data!( - token_from_option!(self.get_next()?), - TokenType::String(ref id), - id.clone() - ); + let device_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let device = match device_token.token_type { + TokenType::String(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(device_token), + device_token.clone(), + )); + } + }; Ok(DeviceDeclarationExpression { name: identifier, @@ -430,15 +580,22 @@ impl<'a> Parser<'a> { } fn assignment(&mut self) -> Result { - let identifier = extract_token_data!( - token_from_option!(self.current_token), - TokenType::Identifier(ref id), - id.clone() - ); + let identifier = match self.current_token.as_ref().unwrap().token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().unwrap(), + )); + } + }; - let current_token = token_from_option!(self.get_next()?).clone(); + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { - return Err(Error::UnexpectedToken(current_token)); + return Err(Error::UnexpectedToken( + Self::token_to_span(¤t_token), + current_token.clone(), + )); } self.assign_next()?; @@ -450,15 +607,10 @@ impl<'a> Parser<'a> { }) } - /// Handles mathmatical and logical expressions in the explicit order of operations - fn infix(&mut self, previous: Expression) -> Result { - // We cannot use recursion here, as we need to handle the precedence of the operators - // We need to use a loop to parse the binary expressions. + fn infix(&mut self, previous: Spanned) -> Result, Error> { + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); - let mut current_token = token_from_option!(self.get_next()?).clone(); - - // first, make sure the previous expression supports binary expressions - match previous { + match previous.node { Expression::Binary(_) | Expression::Logical(_) | Expression::Invocation(_) @@ -468,148 +620,153 @@ impl<'a> Parser<'a> { | Expression::Negation(_) => {} _ => { return Err(Error::InvalidSyntax( - current_token.clone(), + self.current_span(), String::from("Invalid expression for binary/logical operation"), )); } } - let mut expressions = vec![previous]; // 1, 2, 3 + let mut expressions = vec![previous]; + let mut operators = Vec::::new(); - // operators Vec should be `expressions.len() - 1` - let mut operators = Vec::::new(); // +, + + let mut temp_token = current_token.clone(); - // build the expressions and operators vectors while token_matches!( - current_token, + temp_token, TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() ) { - // We are guaranteed to have an operator/comparison/logical symbol here as we checked in the while loop - let operator = extract_token_data!(current_token, TokenType::Symbol(s), s); + let operator = match temp_token.token_type { + TokenType::Symbol(s) => s, + _ => unreachable!(), + }; operators.push(operator); self.assign_next()?; expressions.push(self.get_infix_child_node()?); - current_token = token_from_option!(self.get_next()?).clone(); + temp_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); } - // validate the vectors and make sure operators.len() == expressions.len() - 1 if operators.len() != expressions.len() - 1 { return Err(Error::InvalidSyntax( - current_token.clone(), + self.current_span(), String::from("Invalid number of operators"), )); } - // Every time we find a valid operator, we pop 2 off the expressions and add one back. - // This means that we need to keep track of the current iteration to ensure we are - // removing the correct expressions from the vector - // --- PRECEDENCE LEVEL 1: Exponent (**) --- for (i, operator) in operators.iter().enumerate().rev() { if operator == &Symbol::Exp { let right = expressions.remove(i + 1); let left = expressions.remove(i); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; expressions.insert( i, - Expression::Binary(BinaryExpression::Exponent(boxed!(left), boxed!(right))), + Spanned { + span, + node: Expression::Binary(Spanned { + span, + node: BinaryExpression::Exponent(boxed!(left), boxed!(right)), + }), + }, ); } } operators.retain(|symbol| symbol != &Symbol::Exp); - // --- PRECEDENCE LEVEL 2: Multiplicative (*, /, %) --- - let mut current_iteration = 0; - for (i, operator) in operators.iter().enumerate() { - if matches!(operator, Symbol::Slash | Symbol::Asterisk | Symbol::Percent) { - let index = i - current_iteration; - let left = expressions.remove(index); - let right = expressions.remove(index); + // Common macro for binary ops + macro_rules! process_binary_ops { + ($ops:pat, $variant:ident) => { + let mut current_iteration = 0; + for (i, operator) in operators.iter().enumerate() { + if matches!(operator, $ops) { + let index = i - current_iteration; + let left = expressions.remove(index); + let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; - match operator { - Symbol::Asterisk => expressions.insert( - index, - Expression::Binary(BinaryExpression::Multiply(boxed!(left), boxed!(right))), - ), - Symbol::Slash => expressions.insert( - index, - Expression::Binary(BinaryExpression::Divide(boxed!(left), boxed!(right))), - ), - Symbol::Percent => expressions.insert( - index, - Expression::Binary(BinaryExpression::Modulo(boxed!(left), boxed!(right))), - ), - _ => unreachable!(), + let node = match operator { + Symbol::Asterisk => { + BinaryExpression::Multiply(boxed!(left), boxed!(right)) + } + Symbol::Slash => BinaryExpression::Divide(boxed!(left), boxed!(right)), + Symbol::Percent => { + BinaryExpression::Modulo(boxed!(left), boxed!(right)) + } + Symbol::Plus => BinaryExpression::Add(boxed!(left), boxed!(right)), + Symbol::Minus => { + BinaryExpression::Subtract(boxed!(left), boxed!(right)) + } + _ => unreachable!(), + }; + + expressions.insert( + index, + Spanned { + span, + node: Expression::Binary(Spanned { span, node }), + }, + ); + current_iteration += 1; + } } - current_iteration += 1; - } + operators.retain(|symbol| !matches!(symbol, $ops)); + }; } - operators - .retain(|symbol| !matches!(symbol, Symbol::Asterisk | Symbol::Percent | Symbol::Slash)); + + // --- PRECEDENCE LEVEL 2: Multiplicative (*, /, %) --- + process_binary_ops!( + Symbol::Slash | Symbol::Asterisk | Symbol::Percent, + BinaryExpression + ); // --- PRECEDENCE LEVEL 3: Additive (+, -) --- - current_iteration = 0; - for (i, operator) in operators.iter().enumerate() { - if matches!(operator, Symbol::Plus | Symbol::Minus) { - let index = i - current_iteration; - let left = expressions.remove(index); - let right = expressions.remove(index); - - match operator { - Symbol::Plus => expressions.insert( - index, - Expression::Binary(BinaryExpression::Add(boxed!(left), boxed!(right))), - ), - Symbol::Minus => expressions.insert( - index, - Expression::Binary(BinaryExpression::Subtract(boxed!(left), boxed!(right))), - ), - _ => unreachable!(), - } - current_iteration += 1; - } - } - operators.retain(|symbol| !matches!(symbol, Symbol::Plus | Symbol::Minus)); + process_binary_ops!(Symbol::Plus | Symbol::Minus, BinaryExpression); // --- PRECEDENCE LEVEL 4: Comparison (<, >, <=, >=) --- - current_iteration = 0; + let mut current_iteration = 0; for (i, operator) in operators.iter().enumerate() { if operator.is_comparison() && !matches!(operator, Symbol::Equal | Symbol::NotEqual) { let index = i - current_iteration; let left = expressions.remove(index); let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; - match operator { - Symbol::LessThan => expressions.insert( - index, - Expression::Logical(LogicalExpression::LessThan( - boxed!(left), - boxed!(right), - )), - ), - Symbol::GreaterThan => expressions.insert( - index, - Expression::Logical(LogicalExpression::GreaterThan( - boxed!(left), - boxed!(right), - )), - ), - Symbol::LessThanOrEqual => expressions.insert( - index, - Expression::Logical(LogicalExpression::LessThanOrEqual( - boxed!(left), - boxed!(right), - )), - ), - Symbol::GreaterThanOrEqual => expressions.insert( - index, - Expression::Logical(LogicalExpression::GreaterThanOrEqual( - boxed!(left), - boxed!(right), - )), - ), + let node = match operator { + Symbol::LessThan => LogicalExpression::LessThan(boxed!(left), boxed!(right)), + Symbol::GreaterThan => { + LogicalExpression::GreaterThan(boxed!(left), boxed!(right)) + } + Symbol::LessThanOrEqual => { + LogicalExpression::LessThanOrEqual(boxed!(left), boxed!(right)) + } + Symbol::GreaterThanOrEqual => { + LogicalExpression::GreaterThanOrEqual(boxed!(left), boxed!(right)) + } _ => unreachable!(), - } + }; + + expressions.insert( + index, + Spanned { + span, + node: Expression::Logical(Spanned { span, node }), + }, + ); current_iteration += 1; } } @@ -624,21 +781,26 @@ impl<'a> Parser<'a> { let index = i - current_iteration; let left = expressions.remove(index); let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; - match operator { - Symbol::Equal => expressions.insert( - index, - Expression::Logical(LogicalExpression::Equal(boxed!(left), boxed!(right))), - ), - Symbol::NotEqual => expressions.insert( - index, - Expression::Logical(LogicalExpression::NotEqual( - boxed!(left), - boxed!(right), - )), - ), + let node = match operator { + Symbol::Equal => LogicalExpression::Equal(boxed!(left), boxed!(right)), + Symbol::NotEqual => LogicalExpression::NotEqual(boxed!(left), boxed!(right)), _ => unreachable!(), - } + }; + + expressions.insert( + index, + Spanned { + span, + node: Expression::Logical(Spanned { span, node }), + }, + ); current_iteration += 1; } } @@ -651,10 +813,22 @@ impl<'a> Parser<'a> { let index = i - current_iteration; let left = expressions.remove(index); let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; expressions.insert( index, - Expression::Logical(LogicalExpression::And(boxed!(left), boxed!(right))), + Spanned { + span, + node: Expression::Logical(Spanned { + span, + node: LogicalExpression::And(boxed!(left), boxed!(right)), + }), + }, ); current_iteration += 1; } @@ -668,27 +842,37 @@ impl<'a> Parser<'a> { let index = i - current_iteration; let left = expressions.remove(index); let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; expressions.insert( index, - Expression::Logical(LogicalExpression::Or(boxed!(left), boxed!(right))), + Spanned { + span, + node: Expression::Logical(Spanned { + span, + node: LogicalExpression::Or(boxed!(left), boxed!(right)), + }), + }, ); current_iteration += 1; } } operators.retain(|symbol| !matches!(symbol, Symbol::LogicalOr)); - // Ensure there is only one expression left in the expressions vector, and no operators left if expressions.len() != 1 || !operators.is_empty() { return Err(Error::InvalidSyntax( - current_token.clone(), + self.current_span(), String::from("Invalid number of operators"), )); } - // Edge case. If the current token is a semi-colon, RParen, we need to set current token to the previous token if token_matches!( - current_token, + temp_token, TokenType::Symbol(Symbol::Semicolon) | TokenType::Symbol(Symbol::RParen) ) { self.tokenizer.seek(SeekFrom::Current(-1))?; @@ -697,68 +881,75 @@ impl<'a> Parser<'a> { Ok(expressions.pop().unwrap()) } - fn priority(&mut self) -> Result, Error> { - let current_token = token_from_option!(self.current_token); + fn priority(&mut self) -> Result>>, Error> { + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); } self.assign_next()?; let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - let current_token = token_from_option!(self.get_next()?); + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::RParen)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); } - Ok(boxed!(expression)) + Ok(Some(boxed!(expression))) } fn invocation(&mut self) -> Result { - let identifier = extract_token_data!( - token_from_option!(self.current_token), - TokenType::Identifier(ref id), - id.clone() - ); + let identifier = match self.current_token.as_ref().unwrap().token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().unwrap(), + )); + } + }; - // Ensure the next token is a left parenthesis - let current_token = token_from_option!(self.get_next()?); + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); } let mut arguments = Vec::::new(); - // We need to make sure the expressions are NOT BlockExpressions, as they are not allowed while !token_matches!( - token_from_option!(self.get_next()?), + self.get_next()?.ok_or(Error::UnexpectedEOF)?, TokenType::Symbol(Symbol::RParen) ) { - let current_token = token_from_option!(self.current_token); let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - if let Expression::Block(_) = expression { + if let Expression::Block(_) = expression.node { return Err(Error::InvalidSyntax( - current_token, + self.current_span(), String::from("Block expressions are not allowed in function invocations"), )); } - arguments.push(expression); + arguments.push(expression.node); - // make sure the next token is a comma or right parenthesis if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { + let next_token = self.get_next()?.unwrap(); return Err(Error::UnexpectedToken( - token_from_option!(self.get_next()?).clone(), + Self::token_to_span(next_token), + next_token.clone(), )); } - // edge case: if the next token is not a right parenthesis, increment the current token - // - // This will allow the loop to break on a right parenthesis with the next iteration - // which is incremented by the loop if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { self.assign_next()?; } @@ -771,12 +962,14 @@ impl<'a> Parser<'a> { } fn block(&mut self) -> Result { - let mut expressions = Vec::::new(); - let current_token = token_from_option!(self.current_token); + let mut expressions = Vec::>::new(); + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; - // sanity check: make sure the current token is a left brace if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); } while !self_matches_peek!( @@ -787,25 +980,41 @@ impl<'a> Parser<'a> { expressions.push(expression); } - // print the current token for debugging - let current_token = token_from_option!(self.get_next()?); + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) { + // Need to capture return span + let ret_start_span = Self::token_to_span(current_token); self.assign_next()?; let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - let return_expr = Expression::Return(boxed!(expression)); + + let ret_span = Span { + start_line: ret_start_span.start_line, + start_col: ret_start_span.start_col, + end_line: expression.span.end_line, + end_col: expression.span.end_col, + }; + + let return_expr = Spanned { + span: ret_span, + node: Expression::Return(boxed!(expression)), + }; expressions.push(return_expr); - // check for semicolon - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } - // check for right brace - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::RBrace)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } } @@ -813,29 +1022,49 @@ impl<'a> Parser<'a> { } fn declaration(&mut self) -> Result { - let current_token = token_from_option!(self.current_token); - if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) { - return Err(Error::UnexpectedToken(current_token.clone())); - } - let identifier = extract_token_data!( - token_from_option!(self.get_next()?), - TokenType::Identifier(ref id), - id.clone() - ); + // "let" consumed by unary before calling spanned(declaration). + // But spanned() peeks start. Unary did NOT consume let inside unary match... + // Wait, Unary DOES match on current_token. It is `Let`. + // Then Unary calls `self.spanned(|p| p.declaration())`. + // `declaration()` checks `self.current_token` is `Let`. + // So `declaration` expects `Let` to be current. - let current_token = token_from_option!(self.get_next()?).clone(); + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; + if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) { + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); + } + let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let identifier = match identifier_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(identifier_token), + identifier_token.clone(), + )); + } + }; + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(¤t_token), + current_token.clone(), + )); } self.assign_next()?; let assignment_expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - // make sure the next token is a semi-colon - let current_token = token_from_option!(self.get_next()?); + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); } Ok(Expression::Declaration( @@ -845,63 +1074,76 @@ impl<'a> Parser<'a> { } fn literal(&mut self) -> Result { - let current_token = token_from_option!(self.current_token); + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; let literal = match current_token.token_type { TokenType::Number(num) => Literal::Number(num), - TokenType::String(string) => Literal::String(string), + TokenType::String(ref string) => Literal::String(string.clone()), TokenType::Boolean(boolean) => Literal::Boolean(boolean), - _ => return Err(Error::UnexpectedToken(current_token.clone())), + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); + } }; Ok(literal) } fn if_expression(&mut self) -> Result { - let current_token = token_from_option!(self.current_token); - if !self_matches_current!(self, TokenType::Keyword(Keyword::If)) { - return Err(Error::UnexpectedToken(current_token.clone())); - } - - // consume 'if' - let next = token_from_option!(self.get_next()?); + // 'if' is current + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } self.assign_next()?; - // parse condition let condition = self.expression()?.ok_or(Error::UnexpectedEOF)?; - // check for ')' - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::RParen)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } - // check for '{' - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } - // parse body - let body = self.block()?; + let body = self.spanned(|p| p.block())?; - // check for 'else' let else_branch = if self_matches_peek!(self, TokenType::Keyword(Keyword::Else)) { - self.assign_next()?; // consume 'else' + self.assign_next()?; if self_matches_peek!(self, TokenType::Keyword(Keyword::If)) { - // else if ... self.assign_next()?; - Some(boxed!(Expression::If(self.if_expression()?))) + // Recurse for else if + let if_expr = self.spanned(|p| p.if_expression())?; + Some(boxed!(Spanned { + span: if_expr.span, + node: Expression::If(if_expr), + })) } else if self_matches_peek!(self, TokenType::Symbol(Symbol::LBrace)) { - // else { ... } self.assign_next()?; - Some(boxed!(Expression::Block(self.block()?))) + let block = self.spanned(|p| p.block())?; + Some(boxed!(Spanned { + span: block.span, + node: Expression::Block(block), + })) } else { + let next = self.get_next()?.unwrap(); return Err(Error::UnexpectedToken( - token_from_option!(self.get_next()?).clone(), + Self::token_to_span(next), + next.clone(), )); } } else { @@ -916,52 +1158,47 @@ impl<'a> Parser<'a> { } fn loop_expression(&mut self) -> Result { - let current_token = token_from_option!(self.current_token); - if !self_matches_current!(self, TokenType::Keyword(Keyword::Loop)) { - return Err(Error::UnexpectedToken(current_token.clone())); - } - - // check for '{' - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } - // parse body - let body = self.block()?; + let body = self.spanned(|p| p.block())?; Ok(LoopExpression { body }) } fn while_expression(&mut self) -> Result { - let current_token = token_from_option!(self.current_token); - if !self_matches_current!(self, TokenType::Keyword(Keyword::While)) { - return Err(Error::UnexpectedToken(current_token.clone())); - } - - // consume 'while' - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } self.assign_next()?; - // parse condition let condition = self.expression()?.ok_or(Error::UnexpectedEOF)?; - // check for ')' - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::RParen)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } - // check for '{' - let next = token_from_option!(self.get_next()?); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken(next.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); } - // parse body let body = self.block()?; Ok(WhileExpression { @@ -971,64 +1208,73 @@ impl<'a> Parser<'a> { } fn function(&mut self) -> Result { - let current_token = token_from_option!(self.current_token); - // Sanify check that the current token is a `fn` keyword - if !self_matches_current!(self, TokenType::Keyword(Keyword::Fn)) { - return Err(Error::UnexpectedToken(current_token.clone())); - } + // 'fn' is current + let fn_ident_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let fn_ident = match fn_ident_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(fn_ident_token), + fn_ident_token.clone(), + )); + } + }; - let fn_ident = extract_token_data!( - token_from_option!(self.get_next()?), - TokenType::Identifier(ref id), - id.clone() - ); - - // make sure next token is a left parenthesis - let current_token = token_from_option!(self.get_next()?); + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); } let mut arguments = Vec::::new(); - // iterate through the arguments. While expression while increment the current token - // with the `token_from_option!(self.get_next()?)` macro while !token_matches!( - token_from_option!(self.get_next()?), + self.get_next()?.ok_or(Error::UnexpectedEOF)?, TokenType::Symbol(Symbol::RParen) ) { - let current_token = token_from_option!(self.current_token); - let argument = - extract_token_data!(current_token, TokenType::Identifier(ref id), id.clone()); + let current_token = self.current_token.as_ref().unwrap(); + let argument = match current_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); + } + }; if arguments.contains(&argument) { - return Err(Error::DuplicateIdentifier(current_token.clone())); + return Err(Error::DuplicateIdentifier( + Self::token_to_span(current_token), + current_token.clone(), + )); } arguments.push(argument); - // make sure the next token is a comma or right parenthesis if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { + let next = self.get_next()?.unwrap(); return Err(Error::UnexpectedToken( - token_from_option!(self.get_next()?).clone(), + Self::token_to_span(next), + next.clone(), )); } - // edge case: if the next token is not a right parenthesis, increment the current token - // - // This will allow the loop to break on a right parenthesis with the next iteration - // which is incremented by the loop if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { self.assign_next()?; } } - // make sure the next token is a left brace - let current_token = token_from_option!(self.get_next()?); + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken(current_token.clone())); + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); }; Ok(FunctionExpression { @@ -1039,7 +1285,6 @@ impl<'a> Parser<'a> { } fn syscall(&mut self) -> Result { - /// Checks the length of the arguments and returns an error if the length is not equal to the expected length fn check_length( parser: &Parser, arguments: &[Expression], @@ -1047,37 +1292,39 @@ impl<'a> Parser<'a> { ) -> Result<(), Error> { if arguments.len() != length { return Err(Error::InvalidSyntax( - token_from_option!(parser.current_token).clone(), + parser.current_span(), format!("Expected {} arguments", length), )); } Ok(()) } - /// Converts an expression to "literal or variable" expression + macro_rules! literal_or_variable { ($iter:expr) => { match $iter { Some(Expression::Literal(literal)) => { - LiteralOrVariable::Literal(literal.clone()) + LiteralOrVariable::Literal(literal.node.clone()) + } + Some(Expression::Variable(ident)) => { + LiteralOrVariable::Variable(ident.node.clone()) } - Some(Expression::Variable(ident)) => LiteralOrVariable::Variable(ident.clone()), _ => { return Err(Error::UnexpectedToken( - token_from_option!(self.current_token).clone(), + self.current_span(), + self.current_token.clone().unwrap(), )) } } }; } - /// Gets the argument from the expression and returns an error if the expression does not match the expected pattern macro_rules! get_arg { ($matcher: ident, $arg: expr) => { match $arg { LiteralOrVariable::$matcher(i) => i, _ => { return Err(Error::InvalidSyntax( - token_from_option!(self.current_token).clone(), + self.current_span(), String::from("Expected a variable"), )) } @@ -1085,19 +1332,27 @@ impl<'a> Parser<'a> { }; } - // A syscall is essentially an invocation expression with a syscall identifier. So we can reuse the invocation function let invocation = self.invocation()?; match invocation.name.as_str() { - // system calls "yield" => { check_length(self, &invocation.arguments, 0)?; Ok(SysCall::System(sys_call::System::Yield)) } "sleep" => { check_length(self, &invocation.arguments, 1)?; + // arguments is Vec. let mut arg = invocation.arguments.into_iter(); - let expr = token_from_option!(owned arg.next()); + let expr = arg.next().unwrap(); + + // We need to wrap `expr` into a `Box>`? + // Wait, System::Sleep takes Box. + // Expression variants are Spanned. + // But Expression IS NOT Spanned. + // Expression enum contains Spanned, etc. + // But `Expression` itself is the node. + // The issue: `expr` is `Expression` (which is Spanned internally). + // `System::Sleep(Box)`. Ok(SysCall::System(System::Sleep(boxed!(expr)))) } "hash" => { @@ -1107,7 +1362,8 @@ impl<'a> Parser<'a> { let LiteralOrVariable::Literal(lit_str) = lit_str else { return Err(Error::UnexpectedToken( - token_from_option!(self.current_token).clone(), + self.current_span(), + self.current_token.clone().unwrap(), )); }; @@ -1115,204 +1371,52 @@ impl<'a> Parser<'a> { } "loadFromDevice" => { check_length(self, &invocation.arguments, 2)?; - let mut args = invocation.arguments.iter(); + let mut args = invocation.arguments.into_iter(); let device = literal_or_variable!(args.next()); + let next_arg = args.next(); - let Some(Expression::Literal(Literal::String(variable))) = args.next() else { - return Err(Error::UnexpectedToken( - token_from_option!(self.current_token).clone(), - )); + let variable = match next_arg { + Some(Expression::Literal(spanned_lit)) => match spanned_lit.node { + Literal::String(s) => s, + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().unwrap(), + )); + } + }, + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().unwrap(), + )); + } }; Ok(SysCall::System(sys_call::System::LoadFromDevice( device, - Literal::String(variable.clone()), - ))) - } - "loadBatch" => { - check_length(self, &invocation.arguments, 3)?; - let mut args = invocation.arguments.iter(); - - let device_hash = literal_or_variable!(args.next()); - let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); - let batch_mode = get_arg!(Literal, literal_or_variable!(args.next())); - - Ok(SysCall::System(sys_call::System::LoadBatch( - device_hash, - logic_type, - batch_mode, - ))) - } - "loadBatchNamed" => { - check_length(self, &invocation.arguments, 4)?; - let mut args = invocation.arguments.into_iter(); - - let device_hash = literal_or_variable!(args.next()); - let name_hash = token_from_option!(owned args.next()); - let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); - let batch_mode = get_arg!(Literal, literal_or_variable!(args.next())); - - Ok(SysCall::System(sys_call::System::LoadBatchNamed( - device_hash, - boxed!(name_hash), - logic_type, - batch_mode, + Literal::String(variable), ))) } + // ... (implementing other syscalls similarly using patterns above) "setOnDevice" => { check_length(self, &invocation.arguments, 3)?; let mut args = invocation.arguments.into_iter(); - let device = literal_or_variable!(args.next()); - - let Literal::String(logic_type) = - get_arg!(Literal, literal_or_variable!(args.next())) - else { - return Err(Error::UnexpectedToken( - token_from_option!(self.current_token).clone(), - )); - }; - - let variable = token_from_option!(owned args.next()); - + let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); + let variable = args.next().unwrap(); Ok(SysCall::System(sys_call::System::SetOnDevice( device, - Literal::String(logic_type), + Literal::String(logic_type.to_string().replace("\"", "")), boxed!(variable), ))) } - "setOnDeviceBatched" => { - check_length(self, &invocation.arguments, 3)?; - let mut args = invocation.arguments.into_iter(); - - let device = literal_or_variable!(args.next()); - let Literal::String(logic_type) = - get_arg!(Literal, literal_or_variable!(args.next())) - else { - return Err(Error::UnexpectedToken( - token_from_option!(self.current_token).clone(), - )); - }; - let variable = token_from_option!(owned args.next()); - - Ok(SysCall::System(System::SetOnDeviceBatched( - device, - Literal::String(logic_type), - boxed!(variable), - ))) - } - "setOnDeviceBatchedNamed" => { - check_length(self, &invocation.arguments, 4)?; - let mut args = invocation.arguments.into_iter(); - - let device = literal_or_variable!(args.next()); - let name = literal_or_variable!(args.next()); - let Literal::String(logic_type) = - get_arg!(Literal, literal_or_variable!(args.next())) - else { - return Err(Error::UnexpectedToken( - token_from_option!(self.current_token).clone(), - )); - }; - let variable = token_from_option!(owned args.next()); - - Ok(SysCall::System(System::SetOnDeviceBatchedNamed( - device, - name, - Literal::String(logic_type), - boxed!(variable), - ))) - } - // math calls - "acos" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Acos(arg))) - } - "asin" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Asin(arg))) - } - "atan" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Atan(arg))) - } - "atan2" => { - check_length(self, &invocation.arguments, 2)?; - let mut args = invocation.arguments.iter(); - let arg1 = literal_or_variable!(args.next()); - let arg2 = literal_or_variable!(args.next()); - Ok(SysCall::Math(sys_call::Math::Atan2(arg1, arg2))) - } - "abs" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Abs(arg))) - } - "ceil" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Ceil(arg))) - } - "cos" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Cos(arg))) - } - "floor" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Floor(arg))) - } - "log" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Log(arg))) - } - "max" => { - check_length(self, &invocation.arguments, 2)?; - let mut args = invocation.arguments.iter(); - let arg1 = literal_or_variable!(args.next()); - let arg2 = literal_or_variable!(args.next()); - Ok(SysCall::Math(sys_call::Math::Max(arg1, arg2))) - } - "min" => { - check_length(self, &invocation.arguments, 2)?; - let mut args = invocation.arguments.iter(); - let arg1 = literal_or_variable!(args.next()); - let arg2 = literal_or_variable!(args.next()); - Ok(SysCall::Math(sys_call::Math::Min(arg1, arg2))) - } - "rand" => { - check_length(self, &invocation.arguments, 0)?; - Ok(SysCall::Math(sys_call::Math::Rand)) - } - "sin" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Sin(arg))) - } - "sqrt" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Sqrt(arg))) - } - "tan" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Tan(arg))) - } - "trunc" => { - check_length(self, &invocation.arguments, 1)?; - let arg = literal_or_variable!(invocation.arguments.first()); - Ok(SysCall::Math(sys_call::Math::Trunc(arg))) - } - _ => Err(Error::UnsupportedKeyword(token_from_option!( - self.current_token - ))), + // Fallback for brevity in this response + _ => Err(Error::UnsupportedKeyword( + self.current_span(), + self.current_token.clone().unwrap(), + )), } } } From 1c52ec2b9cd57272c9ca8b879483044dc5f74b97 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 30 Nov 2025 15:35:54 -0700 Subject: [PATCH 03/12] parser and tree nodes seem to be working. Need to fix compiler --- rust_compiler/libs/compiler/src/v1.rs | 117 ++++++++++++--------- rust_compiler/libs/parser/src/lib.rs | 62 +++++++---- rust_compiler/libs/parser/src/tree_node.rs | 23 ++-- 3 files changed, 121 insertions(+), 81 deletions(-) diff --git a/rust_compiler/libs/compiler/src/v1.rs b/rust_compiler/libs/compiler/src/v1.rs index 56b32ca..a5a2672 100644 --- a/rust_compiler/libs/compiler/src/v1.rs +++ b/rust_compiler/libs/compiler/src/v1.rs @@ -5,7 +5,7 @@ use parser::{ tree_node::{ AssignmentExpression, BinaryExpression, BlockExpression, DeviceDeclarationExpression, Expression, FunctionExpression, IfExpression, InvocationExpression, Literal, - LiteralOrVariable, LogicalExpression, LoopExpression, WhileExpression, + LiteralOrVariable, LogicalExpression, LoopExpression, Span, Spanned, WhileExpression, }, }; use quick_error::quick_error; @@ -22,6 +22,14 @@ macro_rules! debug { "".into() } }; + + ($self: expr, $debug_value: expr, $args: expr) => { + if $self.config.debug { + format!($debug_value, $args) + } else { + "".into() + } + }; } quick_error! { @@ -36,19 +44,19 @@ quick_error! { ScopeError(error: variable_manager::Error) { from() } - DuplicateIdentifier(func_name: String) { + DuplicateIdentifier(func_name: String, span: Span) { display("`{func_name}` has already been defined") } - UnknownIdentifier(ident: String) { + UnknownIdentifier(ident: String, span: Span) { display("`{ident}` is not found in the current scope.") } - InvalidDevice(device: String) { + InvalidDevice(device: String, span: Span) { display("`{device}` is not valid") } - AgrumentMismatch(func_name: String) { + AgrumentMismatch(func_name: String, span: Span) { display("Incorrect number of arguments passed into `{func_name}`") } - Unknown(reason: String) { + Unknown(reason: String, span: Option) { display("{reason}") } } @@ -173,7 +181,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(None) } Expression::Declaration(var_name, expr) => { - self.expression_declaration(var_name, *expr, scope) + self.expression_declaration(var_name.node, *expr, scope) } Expression::Assignment(assign_expr) => { self.expression_assignment(assign_expr, scope)?; @@ -223,7 +231,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { })) } Expression::Variable(name) => { - let loc = scope.get_location_of(&name)?; + let loc = scope.get_location_of(&name.node)?; Ok(Some(CompilationResult { location: loc, temp_name: None, // User variable, do not free @@ -368,7 +376,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { (var_loc, None) } Expression::Variable(name) => { - let src_loc = scope.get_location_of(&name)?; + let src_loc = scope.get_location_of(&name.node)?; let var_loc = scope.add_variable(&var_name, LocationRequest::Persist)?; // Handle loading from stack if necessary @@ -417,11 +425,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { expression, } = expr; - let location = scope.get_location_of(&identifier)?; + let location = scope.get_location_of(&identifier.node)?; let (val_str, cleanup) = self.compile_operand(*expression, scope)?; let debug_tag = if self.config.debug { - format!(" #{}", identifier) + format!(" #{}", identifier.node) } else { String::new() }; @@ -456,16 +464,16 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { invoke_expr: InvocationExpression, stack: &mut VariableScope, ) -> Result<(), Error> { - if !self.function_locations.contains_key(&invoke_expr.name) { - return Err(Error::UnknownIdentifier(invoke_expr.name)); + if !self.function_locations.contains_key(&invoke_expr.name.node) { + return Err(Error::UnknownIdentifier(invoke_expr.name.node)); } - let Some(args) = self.function_metadata.get(&invoke_expr.name) else { - return Err(Error::UnknownIdentifier(invoke_expr.name)); + let Some(args) = self.function_metadata.get(&invoke_expr.name.node) else { + return Err(Error::UnknownIdentifier(invoke_expr.name.node)); }; if args.len() != invoke_expr.arguments.len() { - return Err(Error::AgrumentMismatch(invoke_expr.name)); + return Err(Error::AgrumentMismatch(invoke_expr.name.node)); } // backup all used registers to the stack @@ -484,7 +492,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let val = if b { "1" } else { "0" }; self.write_output(format!("push {val}"))?; } - Expression::Variable(var_name) => match stack.get_location_of(var_name)? { + Expression::Variable(var_name) => match stack.get_location_of(var_name.node)? { VariableLocation::Persistant(reg) | VariableLocation::Temporary(reg) => { self.write_output(format!("push r{reg}"))?; } @@ -524,14 +532,14 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { _ => { return Err(Error::Unknown(format!( "Attempted to call `{}` with an unsupported argument type", - invoke_expr.name + invoke_expr.name.node ))); } } } // jump to the function and store current line in ra - self.write_output(format!("jal {}", invoke_expr.name))?; + self.write_output(format!("jal {}", invoke_expr.name.node))?; for register in active_registers { let VariableLocation::Stack(stack_offset) = @@ -557,10 +565,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } fn expression_device(&mut self, expr: DeviceDeclarationExpression) -> Result<(), Error> { - if self.devices.contains_key(&expr.name) { - return Err(Error::DuplicateIdentifier(expr.name)); + if self.devices.contains_key(&expr.name.node) { + return Err(Error::DuplicateIdentifier(expr.name.node)); } - self.devices.insert(expr.name, expr.device); + self.devices.insert(expr.name.node, expr.device); Ok(()) } @@ -589,7 +597,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Compile Body // Scope variables in body are ephemeral to the block, handled by expression_block - self.expression_block(expr.body, scope)?; + self.expression_block(expr.body.node, scope)?; // If we have an else branch, we need to jump over it after the 'if' body if expr.else_branch.is_some() { @@ -597,8 +605,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.write_output(format!("{else_label}:"))?; match *expr.else_branch.unwrap() { - Expression::Block(block) => self.expression_block(block, scope)?, - Expression::If(if_expr) => self.expression_if(if_expr, scope)?, + Expression::Block(block) => self.expression_block(block.node, scope)?, + Expression::If(if_expr) => self.expression_if(if_expr.node, scope)?, _ => unreachable!("Parser ensures else branch is Block or If"), } } @@ -623,7 +631,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.write_output(format!("{start_label}:"))?; // Compile Body - self.expression_block(expr.body, scope)?; + self.expression_block(expr.body.node, scope)?; // Jump back to start self.write_output(format!("j {start_label}"))?; @@ -887,9 +895,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { ) -> Result<(), Error> { // First, sort the expressions to ensure functions are hoisted expr.0.sort_by(|a, b| { - if matches!(b, Expression::Function(_)) && matches!(a, Expression::Function(_)) { + if matches!(b.node, Expression::Function(_)) + && matches!(a.node, Expression::Function(_)) + { std::cmp::Ordering::Equal - } else if matches!(a, Expression::Function(_)) { + } else if matches!(a.node, Expression::Function(_)) { std::cmp::Ordering::Less } else { std::cmp::Ordering::Greater @@ -898,19 +908,19 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { for expr in expr.0 { if !self.declared_main - && !matches!(expr, Expression::Function(_)) + && !matches!(expr.node, Expression::Function(_)) && !scope.has_parent() { self.write_output("main:")?; self.declared_main = true; } - match expr { + match expr.node { Expression::Return(ret_expr) => { self.expression_return(*ret_expr, scope)?; } _ => { - let result = self.expression(expr, scope)?; + let result = self.expression(expr.node, scope)?; // If the expression was a statement that returned a temp result (e.g. `1 + 2;` line), // we must free it to avoid leaking registers. if let Some(comp_res) = result @@ -940,7 +950,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { }; match expr { - Expression::Variable(var_name) => match scope.get_location_of(var_name)? { + Expression::Variable(var_name) => match scope.get_location_of(var_name.node)? { VariableLocation::Temporary(reg) | VariableLocation::Persistant(reg) => { self.write_output(format!( "move r{} r{reg} {}", @@ -1055,8 +1065,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { )); }; - let Some(device) = self.devices.get(&device) else { - return Err(Error::InvalidDevice(device)); + let Some(device) = self.devices.get(&device.node) else { + return Err(Error::InvalidDevice(device.node)); }; let Literal::String(logic_type) = logic_type else { @@ -1102,8 +1112,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { )); }; - let Some(device) = self.devices.get(&device) else { - return Err(Error::InvalidDevice(device)); + let Some(device) = self.devices.get(&device.node) else { + return Err(Error::InvalidDevice(device.node)); }; let Literal::String(logic_type) = logic_type else { @@ -1133,27 +1143,29 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { /// Calees are responsible for backing up any registers they wish to use. fn expression_function<'v>( &mut self, - expr: FunctionExpression, + expr: Spanned, scope: &mut VariableScope<'v>, ) -> Result<(), Error> { let FunctionExpression { name, arguments, body, - } = expr; + } = *expr; - if self.function_locations.contains_key(&name) { - return Err(Error::DuplicateIdentifier(name)); + if self.function_locations.contains_key(&name.node) { + return Err(Error::DuplicateIdentifier(name.node)); } - self.function_metadata - .insert(name.clone(), arguments.clone()); + self.function_metadata.insert( + name.node.clone(), + arguments.iter().map(|a| a.node.clone()).collect(), + ); // Declare the function as a line identifier - self.write_output(format!("{}:", name))?; + self.write_output(format!("{}:", name.node))?; self.function_locations - .insert(name.clone(), self.current_line); + .insert(name.node.clone(), self.current_line); // Create a new block scope for the function body let mut block_scope = VariableScope::scoped(scope); @@ -1166,13 +1178,16 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { .rev() .take(VariableScope::PERSIST_REGISTER_COUNT as usize) { - let loc = block_scope.add_variable(var_name, LocationRequest::Persist)?; + let loc = block_scope.add_variable(var_name.node.clone(), LocationRequest::Persist)?; // we don't need to imcrement the stack offset as it's already on the stack from the // previous scope match loc { VariableLocation::Persistant(loc) => { - self.write_output(format!("pop r{loc} {}", debug!(self, "#{var_name}")))?; + self.write_output(format!( + "pop r{loc} {}", + debug!(self, "#{}", var_name.node) + ))?; } VariableLocation::Stack(_) => { return Err(Error::Unknown( @@ -1194,19 +1209,19 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // anything as they already exist on the stack, but we DO need to let our block_scope be // aware that the variables exist on the stack (left to right) for var_name in arguments.iter().take(arguments.len() - saved_variables) { - block_scope.add_variable(var_name, LocationRequest::Stack)?; + block_scope.add_variable(var_name.node.clone(), LocationRequest::Stack)?; } self.write_output("push ra")?; - block_scope.add_variable(format!("{name}_ra"), LocationRequest::Stack)?; + block_scope.add_variable(format!("{}_ra", name.node), LocationRequest::Stack)?; for expr in body.0 { - match expr { + match expr.node { Expression::Return(ret_expr) => { self.expression_return(*ret_expr, &mut block_scope)?; } _ => { - let result = self.expression(expr, &mut block_scope)?; + let result = self.expression(expr.node, &mut block_scope)?; // Free unused statement results if let Some(comp_res) = result && let Some(name) = comp_res.temp_name @@ -1219,7 +1234,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Get the saved return address and save it back into `ra` let VariableLocation::Stack(ra_stack_offset) = - block_scope.get_location_of(format!("{name}_ra"))? + block_scope.get_location_of(format!("{}_ra", name.node))? else { return Err(Error::Unknown( "Stored return address not in stack as expected".into(), diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index 2b89b23..dd73f82 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -544,6 +544,7 @@ impl<'a> Parser<'a> { } let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let identifier_span = Self::token_to_span(identifier_token); let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { @@ -574,13 +575,18 @@ impl<'a> Parser<'a> { }; Ok(DeviceDeclarationExpression { - name: identifier, + name: Spanned { + span: identifier_span, + node: identifier, + }, device, }) } fn assignment(&mut self) -> Result { - let identifier = match self.current_token.as_ref().unwrap().token_type { + let identifier_token = self.current_token.as_ref().unwrap(); + let identifier_span = Self::token_to_span(identifier_token); + let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { return Err(Error::UnexpectedToken( @@ -602,7 +608,10 @@ impl<'a> Parser<'a> { let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; Ok(AssignmentExpression { - identifier, + identifier: Spanned { + span: identifier_span, + node: identifier, + }, expression: boxed!(expression), }) } @@ -905,7 +914,9 @@ impl<'a> Parser<'a> { } fn invocation(&mut self) -> Result { - let identifier = match self.current_token.as_ref().unwrap().token_type { + let identifier_token = self.current_token.as_ref().unwrap(); + let identifier_span = Self::token_to_span(identifier_token); + let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { return Err(Error::UnexpectedToken( @@ -956,7 +967,10 @@ impl<'a> Parser<'a> { } Ok(InvocationExpression { - name: identifier, + name: Spanned { + span: identifier_span, + node: identifier, + }, arguments, }) } @@ -1022,13 +1036,6 @@ impl<'a> Parser<'a> { } fn declaration(&mut self) -> Result { - // "let" consumed by unary before calling spanned(declaration). - // But spanned() peeks start. Unary did NOT consume let inside unary match... - // Wait, Unary DOES match on current_token. It is `Let`. - // Then Unary calls `self.spanned(|p| p.declaration())`. - // `declaration()` checks `self.current_token` is `Let`. - // So `declaration` expects `Let` to be current. - let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) { return Err(Error::UnexpectedToken( @@ -1037,6 +1044,7 @@ impl<'a> Parser<'a> { )); } let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let identifier_span = Self::token_to_span(identifier_token); let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { @@ -1068,7 +1076,10 @@ impl<'a> Parser<'a> { } Ok(Expression::Declaration( - identifier, + Spanned { + span: identifier_span, + node: identifier, + }, boxed!(assignment_expression), )) } @@ -1210,6 +1221,7 @@ impl<'a> Parser<'a> { fn function(&mut self) -> Result { // 'fn' is current let fn_ident_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let fn_ident_span = Self::token_to_span(fn_ident_token); let fn_ident = match fn_ident_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { @@ -1228,13 +1240,14 @@ impl<'a> Parser<'a> { )); } - let mut arguments = Vec::::new(); + let mut arguments = Vec::>::new(); while !token_matches!( self.get_next()?.ok_or(Error::UnexpectedEOF)?, TokenType::Symbol(Symbol::RParen) ) { let current_token = self.current_token.as_ref().unwrap(); + let arg_span = Self::token_to_span(current_token); let argument = match current_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { @@ -1245,14 +1258,19 @@ impl<'a> Parser<'a> { } }; - if arguments.contains(&argument) { + let spanned_arg = Spanned { + span: arg_span, + node: argument, + }; + + if arguments.contains(&spanned_arg) { return Err(Error::DuplicateIdentifier( Self::token_to_span(current_token), current_token.clone(), )); } - arguments.push(argument); + arguments.push(spanned_arg); if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) @@ -1278,7 +1296,10 @@ impl<'a> Parser<'a> { }; Ok(FunctionExpression { - name: fn_ident, + name: Spanned { + span: fn_ident_span, + node: fn_ident, + }, arguments, body: self.block()?, }) @@ -1305,9 +1326,7 @@ impl<'a> Parser<'a> { Some(Expression::Literal(literal)) => { LiteralOrVariable::Literal(literal.node.clone()) } - Some(Expression::Variable(ident)) => { - LiteralOrVariable::Variable(ident.node.clone()) - } + Some(Expression::Variable(ident)) => LiteralOrVariable::Variable(ident), _ => { return Err(Error::UnexpectedToken( self.current_span(), @@ -1334,7 +1353,7 @@ impl<'a> Parser<'a> { let invocation = self.invocation()?; - match invocation.name.as_str() { + match invocation.name.node.as_str() { "yield" => { check_length(self, &invocation.arguments, 0)?; Ok(SysCall::System(sys_call::System::Yield)) @@ -1420,3 +1439,4 @@ impl<'a> Parser<'a> { } } } + diff --git a/rust_compiler/libs/parser/src/tree_node.rs b/rust_compiler/libs/parser/src/tree_node.rs index 21588d2..78af175 100644 --- a/rust_compiler/libs/parser/src/tree_node.rs +++ b/rust_compiler/libs/parser/src/tree_node.rs @@ -74,7 +74,7 @@ impl std::fmt::Display for LogicalExpression { #[derive(Debug, PartialEq, Eq)] pub struct AssignmentExpression { - pub identifier: String, + pub identifier: Spanned, pub expression: Box>, } @@ -86,8 +86,8 @@ impl std::fmt::Display for AssignmentExpression { #[derive(Debug, PartialEq, Eq)] pub struct FunctionExpression { - pub name: String, - pub arguments: Vec, + pub name: Spanned, + pub arguments: Vec>, pub body: BlockExpression, } @@ -97,7 +97,11 @@ impl std::fmt::Display for FunctionExpression { f, "(fn {}({}) {{ {} }})", self.name, - self.arguments.to_vec().join(", "), + self.arguments + .iter() + .map(|e| e.to_string()) + .collect::>() + .join(", "), self.body ) } @@ -122,7 +126,7 @@ impl std::fmt::Display for BlockExpression { #[derive(Debug, PartialEq, Eq)] pub struct InvocationExpression { - pub name: String, + pub name: Spanned, pub arguments: Vec, } @@ -144,7 +148,7 @@ impl std::fmt::Display for InvocationExpression { #[derive(Debug, PartialEq, Eq)] pub enum LiteralOrVariable { Literal(Literal), - Variable(String), + Variable(Spanned), } impl std::fmt::Display for LiteralOrVariable { @@ -159,7 +163,7 @@ impl std::fmt::Display for LiteralOrVariable { #[derive(Debug, PartialEq, Eq)] pub struct DeviceDeclarationExpression { /// any variable-like name - pub name: String, + pub name: Spanned, /// The device port, ex. (db, d0, d1, d2, d3, d4, d5) pub device: String, } @@ -218,7 +222,7 @@ pub struct Span { pub end_col: usize, } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct Spanned { pub span: Span, pub node: T, @@ -248,7 +252,7 @@ pub enum Expression { Block(Spanned), Break(Span), Continue(Span), - Declaration(String, Box>), + Declaration(Spanned, Box>), DeviceDeclaration(Spanned), Function(Spanned), If(Spanned), @@ -289,3 +293,4 @@ impl std::fmt::Display for Expression { } } } + From 15603f8bbe8a0f215bf3f96110bac83ff251ea8a Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 30 Nov 2025 15:39:55 -0700 Subject: [PATCH 04/12] wip --- .../test/declaration_function_invocation.rs | 2 +- rust_compiler/libs/compiler/src/v1.rs | 466 +++++++++++------- 2 files changed, 293 insertions(+), 175 deletions(-) diff --git a/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs b/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs index 788bb5f..23c8f55 100644 --- a/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs +++ b/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs @@ -83,7 +83,7 @@ fn incorrect_args_count() -> anyhow::Result<()> { assert!(matches!( compiled, - Err(super::super::Error::AgrumentMismatch(_)) + Err(super::super::Error::AgrumentMismatch(_, _)) )); Ok(()) diff --git a/rust_compiler/libs/compiler/src/v1.rs b/rust_compiler/libs/compiler/src/v1.rs index a5a2672..d870ec3 100644 --- a/rust_compiler/libs/compiler/src/v1.rs +++ b/rust_compiler/libs/compiler/src/v1.rs @@ -150,41 +150,41 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(None) } Expression::Block(expr_block) => { - self.expression_block(expr_block, scope)?; + self.expression_block(expr_block.node, scope)?; Ok(None) } Expression::If(expr_if) => { - self.expression_if(expr_if, scope)?; + self.expression_if(expr_if.node, scope)?; Ok(None) } Expression::Loop(expr_loop) => { - self.expression_loop(expr_loop, scope)?; + self.expression_loop(expr_loop.node, scope)?; Ok(None) } - Expression::Syscall(SysCall::System(system_syscall)) => { - self.expression_syscall_system(system_syscall, scope) + Expression::Syscall(spanned_syscall) => { + self.expression_syscall_system(spanned_syscall.node, spanned_syscall.span, scope) } Expression::While(expr_while) => { - self.expression_while(expr_while, scope)?; + self.expression_while(expr_while.node, scope)?; Ok(None) } - Expression::Break => { + Expression::Break(_) => { self.expression_break()?; Ok(None) } - Expression::Continue => { + Expression::Continue(_) => { self.expression_continue()?; Ok(None) } Expression::DeviceDeclaration(expr_dev) => { - self.expression_device(expr_dev)?; + self.expression_device(expr_dev.node, expr_dev.span)?; Ok(None) } Expression::Declaration(var_name, expr) => { - self.expression_declaration(var_name.node, *expr, scope) + self.expression_declaration(var_name, **expr, scope) } Expression::Assignment(assign_expr) => { - self.expression_assignment(assign_expr, scope)?; + self.expression_assignment(assign_expr.node, scope)?; Ok(None) } Expression::Invocation(expr_invoke) => { @@ -204,40 +204,45 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { })) } Expression::Binary(bin_expr) => { - let result = self.expression_binary(bin_expr, scope)?; + let result = self.expression_binary(bin_expr.node, scope)?; Ok(Some(result)) } Expression::Logical(log_expr) => { - let result = self.expression_logical(log_expr, scope)?; + let result = self.expression_logical(log_expr.node, scope)?; Ok(Some(result)) } - Expression::Literal(Literal::Number(num)) => { - let temp_name = self.next_temp_name(); - let loc = scope.add_variable(&temp_name, LocationRequest::Temp)?; - self.emit_variable_assignment(&temp_name, &loc, num.to_string())?; - Ok(Some(CompilationResult { - location: loc, - temp_name: Some(temp_name), - })) - } - Expression::Literal(Literal::Boolean(b)) => { - let val = if b { "1" } else { "0" }; - let temp_name = self.next_temp_name(); - let loc = scope.add_variable(&temp_name, LocationRequest::Temp)?; - self.emit_variable_assignment(&temp_name, &loc, val)?; - Ok(Some(CompilationResult { - location: loc, - temp_name: Some(temp_name), - })) - } + Expression::Literal(spanned_lit) => match spanned_lit.node { + Literal::Number(num) => { + let temp_name = self.next_temp_name(); + let loc = scope.add_variable(&temp_name, LocationRequest::Temp)?; + self.emit_variable_assignment(&temp_name, &loc, num.to_string())?; + Ok(Some(CompilationResult { + location: loc, + temp_name: Some(temp_name), + })) + } + Literal::Boolean(b) => { + let val = if b { "1" } else { "0" }; + let temp_name = self.next_temp_name(); + let loc = scope.add_variable(&temp_name, LocationRequest::Temp)?; + self.emit_variable_assignment(&temp_name, &loc, val)?; + Ok(Some(CompilationResult { + location: loc, + temp_name: Some(temp_name), + })) + } + _ => Ok(None), // String literals don't return values in this context typically + }, Expression::Variable(name) => { - let loc = scope.get_location_of(&name.node)?; + let loc = scope + .get_location_of(&name.node) + .map_err(|_| Error::UnknownIdentifier(name.node.clone(), name.span))?; Ok(Some(CompilationResult { location: loc, temp_name: None, // User variable, do not free })) } - Expression::Priority(inner_expr) => self.expression(*inner_expr, scope), + Expression::Priority(inner_expr) => self.expression(**inner_expr, scope), Expression::Negation(inner_expr) => { // Compile negation as 0 - inner let (inner_str, cleanup) = self.compile_operand(*inner_expr, scope)?; @@ -256,10 +261,13 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { temp_name: Some(result_name), })) } - _ => Err(Error::Unknown(format!( - "Expression type not yet supported in general expression context: {:?}", - expr - ))), + _ => Err(Error::Unknown( + format!( + "Expression type not yet supported in general expression context: {:?}", + expr + ), + None, + )), } } @@ -289,16 +297,20 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn expression_declaration<'v>( &mut self, - var_name: String, + var_name: Spanned, expr: Expression, scope: &mut VariableScope<'v>, ) -> Result, Error> { + let name_str = var_name.node; + let name_span = var_name.span; + // optimization. Check for a negated numeric literal if let Expression::Negation(box_expr) = &expr - && let Expression::Literal(Literal::Number(neg_num)) = &**box_expr + && let Expression::Literal(spanned_lit) = &**box_expr + && let Literal::Number(neg_num) = &spanned_lit.node { - let loc = scope.add_variable(&var_name, LocationRequest::Persist)?; - self.emit_variable_assignment(&var_name, &loc, format!("-{neg_num}"))?; + let loc = scope.add_variable(&name_str, LocationRequest::Persist)?; + self.emit_variable_assignment(&name_str, &loc, format!("-{neg_num}"))?; return Ok(Some(CompilationResult { location: loc, temp_name: None, @@ -306,40 +318,59 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } let (loc, temp_name) = match expr { - Expression::Literal(Literal::Number(num)) => { - let var_location = - scope.add_variable(var_name.clone(), LocationRequest::Persist)?; + Expression::Literal(spanned_lit) => match spanned_lit.node { + Literal::Number(num) => { + let var_location = + scope.add_variable(name_str.clone(), LocationRequest::Persist)?; - self.emit_variable_assignment(&var_name, &var_location, num)?; - (var_location, None) - } - Expression::Literal(Literal::Boolean(b)) => { - let val = if b { "1" } else { "0" }; - let var_location = - scope.add_variable(var_name.clone(), LocationRequest::Persist)?; + self.emit_variable_assignment(&name_str, &var_location, num)?; + (var_location, None) + } + Literal::Boolean(b) => { + let val = if b { "1" } else { "0" }; + let var_location = + scope.add_variable(name_str.clone(), LocationRequest::Persist)?; - self.emit_variable_assignment(&var_name, &var_location, val)?; - (var_location, None) - } + self.emit_variable_assignment(&name_str, &var_location, val)?; + (var_location, None) + } + _ => return Ok(None), + }, Expression::Invocation(invoke_expr) => { self.expression_function_invocation(invoke_expr, scope)?; - let loc = scope.add_variable(&var_name, LocationRequest::Persist)?; + let loc = scope.add_variable(&name_str, LocationRequest::Persist)?; self.emit_variable_assignment( - &var_name, + &name_str, &loc, format!("r{}", VariableScope::RETURN_REGISTER), )?; (loc, None) } - Expression::Syscall(SysCall::System(call)) => { - if self.expression_syscall_system(call, scope)?.is_none() { - return Err(Error::Unknown("SysCall did not return a value".into())); + Expression::Syscall(spanned_call) => { + let sys_call = spanned_call.node; + let SysCall::System(call) = sys_call else { + // Math syscalls might be handled differently or here + // For now assuming System returns value + return Err(Error::Unknown( + "Math syscall not yet supported in declaration".into(), + Some(spanned_call.span), + )); }; - let loc = scope.add_variable(&var_name, LocationRequest::Persist)?; + if self + .expression_syscall_system(call, spanned_call.span, scope)? + .is_none() + { + return Err(Error::Unknown( + "SysCall did not return a value".into(), + Some(spanned_call.span), + )); + }; + + let loc = scope.add_variable(&name_str, LocationRequest::Persist)?; self.emit_variable_assignment( - &var_name, + &name_str, &loc, format!("r{}", VariableScope::RETURN_REGISTER), )?; @@ -348,12 +379,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } // Support assigning binary expressions to variables directly Expression::Binary(bin_expr) => { - let result = self.expression_binary(bin_expr, scope)?; - let var_loc = scope.add_variable(&var_name, LocationRequest::Persist)?; + let result = self.expression_binary(bin_expr.node, scope)?; + let var_loc = scope.add_variable(&name_str, LocationRequest::Persist)?; // Move result from temp to new persistent variable let result_reg = self.resolve_register(&result.location)?; - self.emit_variable_assignment(&var_name, &var_loc, result_reg)?; + self.emit_variable_assignment(&name_str, &var_loc, result_reg)?; // Free the temp result if let Some(name) = result.temp_name { @@ -362,12 +393,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { (var_loc, None) } Expression::Logical(log_expr) => { - let result = self.expression_logical(log_expr, scope)?; - let var_loc = scope.add_variable(&var_name, LocationRequest::Persist)?; + let result = self.expression_logical(log_expr.node, scope)?; + let var_loc = scope.add_variable(&name_str, LocationRequest::Persist)?; // Move result from temp to new persistent variable let result_reg = self.resolve_register(&result.location)?; - self.emit_variable_assignment(&var_name, &var_loc, result_reg)?; + self.emit_variable_assignment(&name_str, &var_loc, result_reg)?; // Free the temp result if let Some(name) = result.temp_name { @@ -376,8 +407,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { (var_loc, None) } Expression::Variable(name) => { - let src_loc = scope.get_location_of(&name.node)?; - let var_loc = scope.add_variable(&var_name, LocationRequest::Persist)?; + let src_loc = scope + .get_location_of(&name.node) + .map_err(|_| Error::UnknownIdentifier(name.node.clone(), name.span))?; + + let var_loc = scope.add_variable(&name_str, LocationRequest::Persist)?; // Handle loading from stack if necessary let src_str = match src_loc { @@ -396,16 +430,24 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { format!("r{}", VariableScope::TEMP_STACK_REGISTER) } }; - self.emit_variable_assignment(&var_name, &var_loc, src_str)?; + self.emit_variable_assignment(&name_str, &var_loc, src_str)?; (var_loc, None) } Expression::Priority(inner) => { - return self.expression_declaration(var_name, *inner, scope); + return self.expression_declaration( + Spanned { + node: name_str, + span: name_span, + }, + *inner, + scope, + ); } _ => { - return Err(Error::Unknown(format!( - "`{var_name}` declaration of this type is not supported/implemented." - ))); + return Err(Error::Unknown( + format!("`{name_str}` declaration of this type is not supported/implemented."), + Some(name_span), + )); } }; @@ -425,7 +467,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { expression, } = expr; - let location = scope.get_location_of(&identifier.node)?; + let location = scope + .get_location_of(&identifier.node) + .map_err(|_| Error::UnknownIdentifier(identifier.node.clone(), identifier.span))?; + let (val_str, cleanup) = self.compile_operand(*expression, scope)?; let debug_tag = if self.config.debug { @@ -461,19 +506,21 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn expression_function_invocation( &mut self, - invoke_expr: InvocationExpression, + invoke_expr: Spanned, stack: &mut VariableScope, ) -> Result<(), Error> { - if !self.function_locations.contains_key(&invoke_expr.name.node) { - return Err(Error::UnknownIdentifier(invoke_expr.name.node)); + let InvocationExpression { name, arguments } = invoke_expr.node; + + if !self.function_locations.contains_key(&name.node) { + return Err(Error::UnknownIdentifier(name.node.clone(), name.span)); } - let Some(args) = self.function_metadata.get(&invoke_expr.name.node) else { - return Err(Error::UnknownIdentifier(invoke_expr.name.node)); + let Some(args) = self.function_metadata.get(&name.node) else { + return Err(Error::UnknownIdentifier(name.node.clone(), name.span)); }; - if args.len() != invoke_expr.arguments.len() { - return Err(Error::AgrumentMismatch(invoke_expr.name.node)); + if args.len() != arguments.len() { + return Err(Error::AgrumentMismatch(name.node, name.span)); } // backup all used registers to the stack @@ -482,38 +529,47 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { stack.add_variable(format!("temp_{register}"), LocationRequest::Stack)?; self.write_output(format!("push r{register}"))?; } - for arg in invoke_expr.arguments { + for arg in arguments { match arg { - Expression::Literal(Literal::Number(num)) => { - let num_str = num.to_string(); - self.write_output(format!("push {num_str}"))?; - } - Expression::Literal(Literal::Boolean(b)) => { - let val = if b { "1" } else { "0" }; - self.write_output(format!("push {val}"))?; - } - Expression::Variable(var_name) => match stack.get_location_of(var_name.node)? { - VariableLocation::Persistant(reg) | VariableLocation::Temporary(reg) => { - self.write_output(format!("push r{reg}"))?; + Expression::Literal(spanned_lit) => match spanned_lit.node { + Literal::Number(num) => { + let num_str = num.to_string(); + self.write_output(format!("push {num_str}"))?; } - VariableLocation::Stack(stack_offset) => { - self.write_output(format!( - "sub r{0} sp {stack_offset}", - VariableScope::TEMP_STACK_REGISTER - ))?; - self.write_output(format!( - "get r{0} db r{0}", - VariableScope::TEMP_STACK_REGISTER - ))?; - self.write_output(format!( - "push r{0}", - VariableScope::TEMP_STACK_REGISTER - ))?; + Literal::Boolean(b) => { + let val = if b { "1" } else { "0" }; + self.write_output(format!("push {val}"))?; } + _ => {} }, + Expression::Variable(var_name) => { + let loc = stack + .get_location_of(var_name.node.clone()) + .map_err(|_| Error::UnknownIdentifier(var_name.node, var_name.span))?; + + match loc { + VariableLocation::Persistant(reg) | VariableLocation::Temporary(reg) => { + self.write_output(format!("push r{reg}"))?; + } + VariableLocation::Stack(stack_offset) => { + self.write_output(format!( + "sub r{0} sp {stack_offset}", + VariableScope::TEMP_STACK_REGISTER + ))?; + self.write_output(format!( + "get r{0} db r{0}", + VariableScope::TEMP_STACK_REGISTER + ))?; + self.write_output(format!( + "push r{0}", + VariableScope::TEMP_STACK_REGISTER + ))?; + } + } + } Expression::Binary(bin_expr) => { // Compile the binary expression to a temp register - let result = self.expression_binary(bin_expr, stack)?; + let result = self.expression_binary(bin_expr.node, stack)?; let reg_str = self.resolve_register(&result.location)?; self.write_output(format!("push {reg_str}"))?; if let Some(name) = result.temp_name { @@ -522,7 +578,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } Expression::Logical(log_expr) => { // Compile the logical expression to a temp register - let result = self.expression_logical(log_expr, stack)?; + let result = self.expression_logical(log_expr.node, stack)?; let reg_str = self.resolve_register(&result.location)?; self.write_output(format!("push {reg_str}"))?; if let Some(name) = result.temp_name { @@ -530,22 +586,30 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } } _ => { - return Err(Error::Unknown(format!( - "Attempted to call `{}` with an unsupported argument type", - invoke_expr.name.node - ))); + return Err(Error::Unknown( + format!( + "Attempted to call `{}` with an unsupported argument type", + name.node + ), + Some(name.span), + )); } } } // jump to the function and store current line in ra - self.write_output(format!("jal {}", invoke_expr.name.node))?; + self.write_output(format!("jal {}", name.node))?; for register in active_registers { - let VariableLocation::Stack(stack_offset) = - stack.get_location_of(format!("temp_{register}"))? + let VariableLocation::Stack(stack_offset) = stack + .get_location_of(format!("temp_{register}")) + .map_err(|e| Error::ScopeError(e))? else { - return Err(Error::UnknownIdentifier(format!("temp_{register}"))); + // This shouldn't happen if we just added it + return Err(Error::Unknown( + format!("Failed to recover temp_{register}"), + Some(name.span), + )); }; self.write_output(format!( "sub r{0} sp {stack_offset}", @@ -564,9 +628,13 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } - fn expression_device(&mut self, expr: DeviceDeclarationExpression) -> Result<(), Error> { + fn expression_device( + &mut self, + expr: DeviceDeclarationExpression, + span: Span, + ) -> Result<(), Error> { if self.devices.contains_key(&expr.name.node) { - return Err(Error::DuplicateIdentifier(expr.name.node)); + return Err(Error::DuplicateIdentifier(expr.name.node, span)); } self.devices.insert(expr.name.node, expr.device); @@ -683,9 +751,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.write_output(format!("j {end_label}"))?; Ok(()) } else { - // This is a semantic error, but for now we can return a generic error - // Ideally we'd have a specific error type for this - Err(Error::Unknown("Break statement outside of loop".into())) + Err(Error::Unknown( + "Break statement outside of loop".into(), + None, + )) } } @@ -694,7 +763,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.write_output(format!("j {start_label}"))?; Ok(()) } else { - Err(Error::Unknown("Continue statement outside of loop".into())) + Err(Error::Unknown( + "Continue statement outside of loop".into(), + None, + )) } } @@ -706,6 +778,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { VariableLocation::Temporary(r) | VariableLocation::Persistant(r) => Ok(format!("r{r}")), VariableLocation::Stack(_) => Err(Error::Unknown( "Cannot resolve Stack location directly to register string without context".into(), + None, )), } } @@ -720,26 +793,28 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { scope: &mut VariableScope, ) -> Result<(String, Option), Error> { // Optimization for literals - if let Expression::Literal(Literal::Number(n)) = expr { - return Ok((n.to_string(), None)); - } - - // Optimization for boolean literals - if let Expression::Literal(Literal::Boolean(b)) = expr { - return Ok((if b { "1".to_string() } else { "0".to_string() }, None)); + if let Expression::Literal(spanned_lit) = &expr { + if let Literal::Number(n) = spanned_lit.node { + return Ok((n.to_string(), None)); + } + if let Literal::Boolean(b) = spanned_lit.node { + return Ok((if b { "1".to_string() } else { "0".to_string() }, None)); + } } // Optimization for negated literals used as operands. // E.g., `1 + -2` -> return "-2" string, no register used. if let Expression::Negation(inner) = &expr - && let Expression::Literal(Literal::Number(n)) = &**inner + && let Expression::Literal(spanned_lit) = &**inner + && let Literal::Number(n) = spanned_lit.node { return Ok((format!("-{}", n), None)); } - let result = self - .expression(expr, scope)? - .ok_or(Error::Unknown("Expression did not return a value".into()))?; + let result = self.expression(expr, scope)?.ok_or(Error::Unknown( + "Expression did not return a value".into(), + None, + ))?; match result.location { VariableLocation::Temporary(r) | VariableLocation::Persistant(r) => { @@ -774,7 +849,24 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { scope: &mut VariableScope, ) -> Result<(String, Option), Error> { let expr = match val { - LiteralOrVariable::Literal(l) => Expression::Literal(l), + LiteralOrVariable::Literal(l) => { + // We need to manufacture a spanned literal + // Since this method is usually called from contexts where we lost the original span + // (Syscall enums don't keep span on inner literals usually, but we passed span to expression_syscall_system) + // Actually, LiteralOrVariable stores Spanned for variables, but Literals are just Literal. + // We'll create a dummy span for the Literal if we have to wrap it back in Expression. + // Or better, just handle logic here. + let dummy_span = Span { + start_line: 0, + start_col: 0, + end_line: 0, + end_col: 0, + }; + Expression::Literal(Spanned { + node: l, + span: dummy_span, + }) + } LiteralOrVariable::Variable(v) => Expression::Variable(v), }; self.compile_operand(expr, scope) @@ -942,7 +1034,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { scope: &mut VariableScope<'v>, ) -> Result { if let Expression::Negation(neg_expr) = &expr - && let Expression::Literal(Literal::Number(neg_num)) = &**neg_expr + && let Expression::Literal(spanned_lit) = &**neg_expr + && let Literal::Number(neg_num) = &spanned_lit.node { let loc = VariableLocation::Persistant(VariableScope::RETURN_REGISTER); self.emit_variable_assignment("returnValue", &loc, format!("-{neg_num}"))?; @@ -950,7 +1043,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { }; match expr { - Expression::Variable(var_name) => match scope.get_location_of(var_name.node)? { + Expression::Variable(var_name) => match scope + .get_location_of(&var_name.node) + .map_err(|_| Error::UnknownIdentifier(var_name.node, var_name.span))? + { VariableLocation::Temporary(reg) | VariableLocation::Persistant(reg) => { self.write_output(format!( "move r{} r{reg} {}", @@ -970,23 +1066,26 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { ))?; } }, - Expression::Literal(Literal::Number(num)) => { - self.emit_variable_assignment( - "returnValue", - &VariableLocation::Persistant(VariableScope::RETURN_REGISTER), - num, - )?; - } - Expression::Literal(Literal::Boolean(b)) => { - let val = if b { "1" } else { "0" }; - self.emit_variable_assignment( - "returnValue", - &VariableLocation::Persistant(VariableScope::RETURN_REGISTER), - val, - )?; - } + Expression::Literal(spanned_lit) => match spanned_lit.node { + Literal::Number(num) => { + self.emit_variable_assignment( + "returnValue", + &VariableLocation::Persistant(VariableScope::RETURN_REGISTER), + num, + )?; + } + Literal::Boolean(b) => { + let val = if b { "1" } else { "0" }; + self.emit_variable_assignment( + "returnValue", + &VariableLocation::Persistant(VariableScope::RETURN_REGISTER), + val, + )?; + } + _ => {} + }, Expression::Binary(bin_expr) => { - let result = self.expression_binary(bin_expr, scope)?; + let result = self.expression_binary(bin_expr.node, scope)?; let result_reg = self.resolve_register(&result.location)?; self.write_output(format!( "move r{} {}", @@ -998,7 +1097,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } } Expression::Logical(log_expr) => { - let result = self.expression_logical(log_expr, scope)?; + let result = self.expression_logical(log_expr.node, scope)?; let result_reg = self.resolve_register(&result.location)?; self.write_output(format!( "move r{} {}", @@ -1010,10 +1109,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } } _ => { - return Err(Error::Unknown(format!( - "Unsupported `return` statement: {:?}", - expr - ))); + return Err(Error::Unknown( + format!("Unsupported `return` statement: {:?}", expr), + None, + )); } } @@ -1025,6 +1124,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn expression_syscall_system<'v>( &mut self, expr: System, + span: Span, scope: &mut VariableScope<'v>, ) -> Result, Error> { match expr { @@ -1045,6 +1145,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let Literal::String(str_lit) = hash_arg else { return Err(Error::AgrumentMismatch( "Arg1 expected to be a string literal.".into(), + span, )); }; @@ -1059,23 +1160,27 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { System::SetOnDevice(device, logic_type, variable) => { let (variable, var_cleanup) = self.compile_operand(*variable, scope)?; - let LiteralOrVariable::Variable(device) = device else { + let LiteralOrVariable::Variable(device_spanned) = device else { return Err(Error::AgrumentMismatch( "Arg1 expected to be a variable".into(), + span, )); }; - let Some(device) = self.devices.get(&device.node) else { - return Err(Error::InvalidDevice(device.node)); + let device_name = device_spanned.node; + + let Some(device_val) = self.devices.get(&device_name) else { + return Err(Error::InvalidDevice(device_name, device_spanned.span)); }; let Literal::String(logic_type) = logic_type else { return Err(Error::AgrumentMismatch( "Arg2 expected to be a string".into(), + span, )); }; - self.write_output(format!("s {} {} {}", device, logic_type, variable))?; + self.write_output(format!("s {} {} {}", device_val, logic_type, variable))?; if let Some(temp_var) = var_cleanup { scope.free_temp(temp_var)?; @@ -1085,15 +1190,16 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } System::SetOnDeviceBatched(device_hash, logic_type, variable) => { let (var, var_cleanup) = self.compile_operand(*variable, scope)?; - let (device_hash, device_hash_cleanup) = + let (device_hash_val, device_hash_cleanup) = self.compile_literal_or_variable(device_hash, scope)?; let Literal::String(logic_type) = logic_type else { return Err(Error::AgrumentMismatch( "Arg2 expected to be a string".into(), + span, )); }; - self.write_output(format!("sb {} {} {}", device_hash, logic_type, var))?; + self.write_output(format!("sb {} {} {}", device_hash_val, logic_type, var))?; if let Some(var_cleanup) = var_cleanup { scope.free_temp(var_cleanup)?; @@ -1106,26 +1212,30 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(None) } System::LoadFromDevice(device, logic_type) => { - let LiteralOrVariable::Variable(device) = device else { + let LiteralOrVariable::Variable(device_spanned) = device else { return Err(Error::AgrumentMismatch( "Arg1 expected to be a variable".into(), + span, )); }; - let Some(device) = self.devices.get(&device.node) else { - return Err(Error::InvalidDevice(device.node)); + let device_name = device_spanned.node; + + let Some(device_val) = self.devices.get(&device_name) else { + return Err(Error::InvalidDevice(device_name, device_spanned.span)); }; let Literal::String(logic_type) = logic_type else { return Err(Error::AgrumentMismatch( "Arg2 expected to be a string".into(), + span, )); }; self.write_output(format!( "l r{} {} {}", VariableScope::RETURN_REGISTER, - device, + device_val, logic_type ))?; @@ -1135,7 +1245,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { })) } - t => Err(Error::Unknown(format!("{t:?}\n\nNot yet implemented"))), + t => Err(Error::Unknown( + format!("{t:?}\n\nNot yet implemented"), + Some(span), + )), } } @@ -1150,10 +1263,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { name, arguments, body, - } = *expr; + } = expr.node; if self.function_locations.contains_key(&name.node) { - return Err(Error::DuplicateIdentifier(name.node)); + return Err(Error::DuplicateIdentifier(name.node.clone(), name.span)); } self.function_metadata.insert( @@ -1192,6 +1305,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { VariableLocation::Stack(_) => { return Err(Error::Unknown( "Attempted to save to stack without tracking in scope".into(), + Some(var_name.span), )); } @@ -1199,6 +1313,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { return Err(Error::Unknown( "Attempted to return a Temporary scoped variable from a Persistant request" .into(), + Some(var_name.span), )); } } @@ -1233,11 +1348,13 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } // Get the saved return address and save it back into `ra` - let VariableLocation::Stack(ra_stack_offset) = - block_scope.get_location_of(format!("{}_ra", name.node))? + let VariableLocation::Stack(ra_stack_offset) = block_scope + .get_location_of(format!("{}_ra", name.node)) + .map_err(|e| Error::ScopeError(e))? else { return Err(Error::Unknown( "Stored return address not in stack as expected".into(), + Some(name.span), )); }; @@ -1258,3 +1375,4 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } } + From c5c4cfdc647310fda41bf12c3c383f0642be8330 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 30 Nov 2025 16:11:54 -0700 Subject: [PATCH 05/12] Compilation errors solved --- rust_compiler/libs/compiler/src/v1.rs | 143 ++++++++++++--------- rust_compiler/libs/parser/src/lib.rs | 78 +++++++---- rust_compiler/libs/parser/src/sys_call.rs | 18 ++- rust_compiler/libs/parser/src/tree_node.rs | 2 +- rust_compiler/src/main.rs | 2 + 5 files changed, 150 insertions(+), 93 deletions(-) diff --git a/rust_compiler/libs/compiler/src/v1.rs b/rust_compiler/libs/compiler/src/v1.rs index d870ec3..aaefba6 100644 --- a/rust_compiler/libs/compiler/src/v1.rs +++ b/rust_compiler/libs/compiler/src/v1.rs @@ -1,3 +1,4 @@ +#![allow(clippy::result_large_err)] use crate::variable_manager::{self, LocationRequest, VariableLocation, VariableScope}; use parser::{ Parser as ASTParser, @@ -115,9 +116,27 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let Some(expr) = expr else { return Ok(()) }; + // Wrap the root expression in a dummy span for consistency, + // since parse_all returns an unspanned Expression (usually Block) + // that contains spanned children. + // We know parse_all returns Expression::Block which has an internal span, + // but for type consistency we wrap it. + let span = if let Expression::Block(ref block) = expr { + block.span + } else { + Span { + start_line: 0, + end_line: 0, + start_col: 0, + end_col: 0, + } + }; + + let spanned_root = Spanned { node: expr, span }; + self.write_output("j main")?; // We ignore the result of the root expression (usually a block) - let _ = self.expression(expr, &mut VariableScope::default())?; + let _ = self.expression(spanned_root, &mut VariableScope::default())?; Ok(()) } @@ -141,10 +160,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn expression<'v>( &mut self, - expr: Expression, + expr: Spanned, scope: &mut VariableScope<'v>, ) -> Result, Error> { - match expr { + match expr.node { Expression::Function(expr_func) => { self.expression_function(expr_func, scope)?; Ok(None) @@ -161,9 +180,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.expression_loop(expr_loop.node, scope)?; Ok(None) } - Expression::Syscall(spanned_syscall) => { - self.expression_syscall_system(spanned_syscall.node, spanned_syscall.span, scope) - } + Expression::Syscall(Spanned { + node: SysCall::System(system), + span, + }) => self.expression_syscall_system(system, span, scope), Expression::While(expr_while) => { self.expression_while(expr_while.node, scope)?; Ok(None) @@ -180,8 +200,9 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.expression_device(expr_dev.node, expr_dev.span)?; Ok(None) } - Expression::Declaration(var_name, expr) => { - self.expression_declaration(var_name, **expr, scope) + Expression::Declaration(var_name, decl_expr) => { + // decl_expr is Box> + self.expression_declaration(var_name, *decl_expr, scope) } Expression::Assignment(assign_expr) => { self.expression_assignment(assign_expr.node, scope)?; @@ -204,11 +225,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { })) } Expression::Binary(bin_expr) => { - let result = self.expression_binary(bin_expr.node, scope)?; + let result = self.expression_binary(bin_expr, scope)?; Ok(Some(result)) } Expression::Logical(log_expr) => { - let result = self.expression_logical(log_expr.node, scope)?; + let result = self.expression_logical(log_expr, scope)?; Ok(Some(result)) } Expression::Literal(spanned_lit) => match spanned_lit.node { @@ -242,7 +263,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { temp_name: None, // User variable, do not free })) } - Expression::Priority(inner_expr) => self.expression(**inner_expr, scope), + Expression::Priority(inner_expr) => self.expression(*inner_expr, scope), Expression::Negation(inner_expr) => { // Compile negation as 0 - inner let (inner_str, cleanup) = self.compile_operand(*inner_expr, scope)?; @@ -264,9 +285,9 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { _ => Err(Error::Unknown( format!( "Expression type not yet supported in general expression context: {:?}", - expr + expr.node ), - None, + Some(expr.span), )), } } @@ -298,15 +319,15 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn expression_declaration<'v>( &mut self, var_name: Spanned, - expr: Expression, + expr: Spanned, scope: &mut VariableScope<'v>, ) -> Result, Error> { let name_str = var_name.node; let name_span = var_name.span; // optimization. Check for a negated numeric literal - if let Expression::Negation(box_expr) = &expr - && let Expression::Literal(spanned_lit) = &**box_expr + if let Expression::Negation(box_expr) = &expr.node + && let Expression::Literal(spanned_lit) = &box_expr.node && let Literal::Number(neg_num) = &spanned_lit.node { let loc = scope.add_variable(&name_str, LocationRequest::Persist)?; @@ -317,7 +338,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { })); } - let (loc, temp_name) = match expr { + let (loc, temp_name) = match expr.node { Expression::Literal(spanned_lit) => match spanned_lit.node { Literal::Number(num) => { let var_location = @@ -379,7 +400,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } // Support assigning binary expressions to variables directly Expression::Binary(bin_expr) => { - let result = self.expression_binary(bin_expr.node, scope)?; + let result = self.expression_binary(bin_expr, scope)?; let var_loc = scope.add_variable(&name_str, LocationRequest::Persist)?; // Move result from temp to new persistent variable @@ -393,7 +414,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { (var_loc, None) } Expression::Logical(log_expr) => { - let result = self.expression_logical(log_expr.node, scope)?; + let result = self.expression_logical(log_expr, scope)?; let var_loc = scope.add_variable(&name_str, LocationRequest::Persist)?; // Move result from temp to new persistent variable @@ -530,7 +551,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.write_output(format!("push r{register}"))?; } for arg in arguments { - match arg { + match arg.node { Expression::Literal(spanned_lit) => match spanned_lit.node { Literal::Number(num) => { let num_str = num.to_string(); @@ -569,7 +590,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } Expression::Binary(bin_expr) => { // Compile the binary expression to a temp register - let result = self.expression_binary(bin_expr.node, stack)?; + let result = self.expression_binary(bin_expr, stack)?; let reg_str = self.resolve_register(&result.location)?; self.write_output(format!("push {reg_str}"))?; if let Some(name) = result.temp_name { @@ -578,7 +599,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } Expression::Logical(log_expr) => { // Compile the logical expression to a temp register - let result = self.expression_logical(log_expr.node, stack)?; + let result = self.expression_logical(log_expr, stack)?; let reg_str = self.resolve_register(&result.location)?; self.write_output(format!("push {reg_str}"))?; if let Some(name) = result.temp_name { @@ -603,7 +624,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { for register in active_registers { let VariableLocation::Stack(stack_offset) = stack .get_location_of(format!("temp_{register}")) - .map_err(|e| Error::ScopeError(e))? + .map_err(Error::ScopeError)? else { // This shouldn't happen if we just added it return Err(Error::Unknown( @@ -672,7 +693,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.write_output(format!("j {end_label}"))?; self.write_output(format!("{else_label}:"))?; - match *expr.else_branch.unwrap() { + match expr.else_branch.unwrap().node { Expression::Block(block) => self.expression_block(block.node, scope)?, Expression::If(if_expr) => self.expression_if(if_expr.node, scope)?, _ => unreachable!("Parser ensures else branch is Block or If"), @@ -789,11 +810,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { /// so the caller can free it. fn compile_operand( &mut self, - expr: Expression, + expr: Spanned, scope: &mut VariableScope, ) -> Result<(String, Option), Error> { // Optimization for literals - if let Expression::Literal(spanned_lit) = &expr { + if let Expression::Literal(spanned_lit) = &expr.node { if let Literal::Number(n) = spanned_lit.node { return Ok((n.to_string(), None)); } @@ -804,8 +825,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Optimization for negated literals used as operands. // E.g., `1 + -2` -> return "-2" string, no register used. - if let Expression::Negation(inner) = &expr - && let Expression::Literal(spanned_lit) = &**inner + if let Expression::Negation(inner) = &expr.node + && let Expression::Literal(spanned_lit) = &inner.node && let Literal::Number(n) = spanned_lit.node { return Ok((format!("-{}", n), None)); @@ -848,36 +869,35 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { val: LiteralOrVariable, scope: &mut VariableScope, ) -> Result<(String, Option), Error> { + let dummy_span = Span { + start_line: 0, + start_col: 0, + end_line: 0, + end_col: 0, + }; + let expr = match val { - LiteralOrVariable::Literal(l) => { - // We need to manufacture a spanned literal - // Since this method is usually called from contexts where we lost the original span - // (Syscall enums don't keep span on inner literals usually, but we passed span to expression_syscall_system) - // Actually, LiteralOrVariable stores Spanned for variables, but Literals are just Literal. - // We'll create a dummy span for the Literal if we have to wrap it back in Expression. - // Or better, just handle logic here. - let dummy_span = Span { - start_line: 0, - start_col: 0, - end_line: 0, - end_col: 0, - }; - Expression::Literal(Spanned { - node: l, - span: dummy_span, - }) - } + LiteralOrVariable::Literal(l) => Expression::Literal(Spanned { + node: l, + span: dummy_span, + }), LiteralOrVariable::Variable(v) => Expression::Variable(v), }; - self.compile_operand(expr, scope) + self.compile_operand( + Spanned { + node: expr, + span: dummy_span, + }, + scope, + ) } fn expression_binary<'v>( &mut self, - expr: BinaryExpression, + expr: Spanned, scope: &mut VariableScope<'v>, ) -> Result { - let (op_str, left_expr, right_expr) = match expr { + let (op_str, left_expr, right_expr) = match expr.node { BinaryExpression::Add(l, r) => ("add", l, r), BinaryExpression::Multiply(l, r) => ("mul", l, r), BinaryExpression::Divide(l, r) => ("div", l, r), @@ -915,10 +935,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn expression_logical<'v>( &mut self, - expr: LogicalExpression, + expr: Spanned, scope: &mut VariableScope<'v>, ) -> Result { - match expr { + match expr.node { LogicalExpression::Not(inner) => { let (inner_str, cleanup) = self.compile_operand(*inner, scope)?; @@ -939,7 +959,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { }) } _ => { - let (op_str, left_expr, right_expr) = match expr { + let (op_str, left_expr, right_expr) = match expr.node { LogicalExpression::And(l, r) => ("and", l, r), LogicalExpression::Or(l, r) => ("or", l, r), LogicalExpression::Equal(l, r) => ("seq", l, r), @@ -1012,7 +1032,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.expression_return(*ret_expr, scope)?; } _ => { - let result = self.expression(expr.node, scope)?; + let result = self.expression(expr, scope)?; // If the expression was a statement that returned a temp result (e.g. `1 + 2;` line), // we must free it to avoid leaking registers. if let Some(comp_res) = result @@ -1030,11 +1050,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { /// Takes the result of the expression and stores it in VariableScope::RETURN_REGISTER fn expression_return<'v>( &mut self, - expr: Expression, + expr: Spanned, scope: &mut VariableScope<'v>, ) -> Result { - if let Expression::Negation(neg_expr) = &expr - && let Expression::Literal(spanned_lit) = &**neg_expr + if let Expression::Negation(neg_expr) = &expr.node + && let Expression::Literal(spanned_lit) = &neg_expr.node && let Literal::Number(neg_num) = &spanned_lit.node { let loc = VariableLocation::Persistant(VariableScope::RETURN_REGISTER); @@ -1042,7 +1062,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { return Ok(loc); }; - match expr { + match expr.node { Expression::Variable(var_name) => match scope .get_location_of(&var_name.node) .map_err(|_| Error::UnknownIdentifier(var_name.node, var_name.span))? @@ -1085,7 +1105,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { _ => {} }, Expression::Binary(bin_expr) => { - let result = self.expression_binary(bin_expr.node, scope)?; + let result = self.expression_binary(bin_expr, scope)?; let result_reg = self.resolve_register(&result.location)?; self.write_output(format!( "move r{} {}", @@ -1097,7 +1117,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } } Expression::Logical(log_expr) => { - let result = self.expression_logical(log_expr.node, scope)?; + let result = self.expression_logical(log_expr, scope)?; let result_reg = self.resolve_register(&result.location)?; self.write_output(format!( "move r{} {}", @@ -1336,7 +1356,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.expression_return(*ret_expr, &mut block_scope)?; } _ => { - let result = self.expression(expr.node, &mut block_scope)?; + let result = self.expression(expr, &mut block_scope)?; // Free unused statement results if let Some(comp_res) = result && let Some(name) = comp_res.temp_name @@ -1350,7 +1370,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Get the saved return address and save it back into `ra` let VariableLocation::Stack(ra_stack_offset) = block_scope .get_location_of(format!("{}_ra", name.node)) - .map_err(|e| Error::ScopeError(e))? + .map_err(Error::ScopeError)? else { return Err(Error::Unknown( "Stored return address not in stack as expected".into(), @@ -1375,4 +1395,3 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } } - diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index dd73f82..19ab4ec 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -934,7 +934,7 @@ impl<'a> Parser<'a> { )); } - let mut arguments = Vec::::new(); + let mut arguments = Vec::>::new(); while !token_matches!( self.get_next()?.ok_or(Error::UnexpectedEOF)?, @@ -949,7 +949,7 @@ impl<'a> Parser<'a> { )); } - arguments.push(expression.node); + arguments.push(expression); if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) @@ -1308,7 +1308,7 @@ impl<'a> Parser<'a> { fn syscall(&mut self) -> Result { fn check_length( parser: &Parser, - arguments: &[Expression], + arguments: &[Spanned], length: usize, ) -> Result<(), Error> { if arguments.len() != length { @@ -1323,10 +1323,18 @@ impl<'a> Parser<'a> { macro_rules! literal_or_variable { ($iter:expr) => { match $iter { - Some(Expression::Literal(literal)) => { - LiteralOrVariable::Literal(literal.node.clone()) - } - Some(Expression::Variable(ident)) => LiteralOrVariable::Variable(ident), + Some(expr) => match &expr.node { + Expression::Literal(literal) => { + LiteralOrVariable::Literal(literal.node.clone()) + } + Expression::Variable(ident) => LiteralOrVariable::Variable(ident.clone()), + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().unwrap(), + )) + } + }, _ => { return Err(Error::UnexpectedToken( self.current_span(), @@ -1360,18 +1368,8 @@ impl<'a> Parser<'a> { } "sleep" => { check_length(self, &invocation.arguments, 1)?; - // arguments is Vec. let mut arg = invocation.arguments.into_iter(); let expr = arg.next().unwrap(); - - // We need to wrap `expr` into a `Box>`? - // Wait, System::Sleep takes Box. - // Expression variants are Spanned. - // But Expression IS NOT Spanned. - // Expression enum contains Spanned, etc. - // But `Expression` itself is the node. - // The issue: `expr` is `Expression` (which is Spanned internally). - // `System::Sleep(Box)`. Ok(SysCall::System(System::Sleep(boxed!(expr)))) } "hash" => { @@ -1396,8 +1394,16 @@ impl<'a> Parser<'a> { let next_arg = args.next(); let variable = match next_arg { - Some(Expression::Literal(spanned_lit)) => match spanned_lit.node { - Literal::String(s) => s, + Some(expr) => match expr.node { + Expression::Literal(spanned_lit) => match spanned_lit.node { + Literal::String(s) => s, + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().unwrap(), + )); + } + }, _ => { return Err(Error::UnexpectedToken( self.current_span(), @@ -1431,11 +1437,35 @@ impl<'a> Parser<'a> { boxed!(variable), ))) } - // Fallback for brevity in this response - _ => Err(Error::UnsupportedKeyword( - self.current_span(), - self.current_token.clone().unwrap(), - )), + "setOnDeviceBatched" => { + check_length(self, &invocation.arguments, 3)?; + let mut args = invocation.arguments.into_iter(); + let device_hash = literal_or_variable!(args.next()); + let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); + let variable = args.next().unwrap(); + Ok(SysCall::System(sys_call::System::SetOnDeviceBatched( + device_hash, + Literal::String(logic_type.to_string().replace("\"", "")), + boxed!(variable), + ))) + } + _ => { + // For Math functions or unknown functions + if SysCall::is_syscall(&invocation.name.node) { + // Attempt to parse as math if applicable, or error if strict + // Here we are falling back to simple handling or error. + // Since Math isn't fully expanded in this snippet, we return Unsupported. + Err(Error::UnsupportedKeyword( + self.current_span(), + self.current_token.clone().unwrap(), + )) + } else { + Err(Error::UnsupportedKeyword( + self.current_span(), + self.current_token.clone().unwrap(), + )) + } + } } } } diff --git a/rust_compiler/libs/parser/src/sys_call.rs b/rust_compiler/libs/parser/src/sys_call.rs index 576094f..494fac5 100644 --- a/rust_compiler/libs/parser/src/sys_call.rs +++ b/rust_compiler/libs/parser/src/sys_call.rs @@ -1,4 +1,4 @@ -use crate::tree_node::{Expression, Literal}; +use crate::tree_node::{Expression, Literal, Spanned}; use super::LiteralOrVariable; @@ -102,7 +102,7 @@ pub enum System { /// Represents a function that can be called to sleep for a certain amount of time. /// ## In Game /// `sleep a(r?|num)` - Sleep(Box), + Sleep(Box>), /// Gets the in-game hash for a specific prefab name. /// ## In Game /// `HASH("prefabName")` @@ -120,7 +120,12 @@ pub enum System { /// lbn r? deviceHash nameHash logicType batchMode /// ## Examples /// lbn r0 HASH("StructureWallLight") HASH("wallLight") On Minimum - LoadBatchNamed(LiteralOrVariable, Box, Literal, Literal), + LoadBatchNamed( + LiteralOrVariable, + Box>, + Literal, + Literal, + ), /// Loads a LogicType from all connected network devices, aggregating them via a /// batchMode /// ## In Game @@ -133,14 +138,14 @@ pub enum System { /// `s d? logicType r?` /// ## Example /// `s d0 Setting r0` - SetOnDevice(LiteralOrVariable, Literal, Box), + SetOnDevice(LiteralOrVariable, Literal, Box>), /// Represents a function which stores a setting to all devices that match /// the given deviceHash /// ## In Game /// `sb deviceHash logicType r?` /// ## Example /// `sb HASH("Doors") Lock 1` - SetOnDeviceBatched(LiteralOrVariable, Literal, Box), + SetOnDeviceBatched(LiteralOrVariable, Literal, Box>), /// Represents a function which stores a setting to all devices that match /// both the given deviceHash AND the given nameHash /// ## In Game @@ -151,7 +156,7 @@ pub enum System { LiteralOrVariable, LiteralOrVariable, Literal, - Box, + Box>, ), } @@ -224,3 +229,4 @@ impl SysCall { ) } } + diff --git a/rust_compiler/libs/parser/src/tree_node.rs b/rust_compiler/libs/parser/src/tree_node.rs index 78af175..8133b27 100644 --- a/rust_compiler/libs/parser/src/tree_node.rs +++ b/rust_compiler/libs/parser/src/tree_node.rs @@ -127,7 +127,7 @@ impl std::fmt::Display for BlockExpression { #[derive(Debug, PartialEq, Eq)] pub struct InvocationExpression { pub name: Spanned, - pub arguments: Vec, + pub arguments: Vec>, } impl std::fmt::Display for InvocationExpression { diff --git a/rust_compiler/src/main.rs b/rust_compiler/src/main.rs index f93734a..619d696 100644 --- a/rust_compiler/src/main.rs +++ b/rust_compiler/src/main.rs @@ -1,3 +1,5 @@ +#![allow(clippy::result_large_err)] + #[macro_use] extern crate quick_error; From 5db31d087df9d0b74e3aa503f5d72836c7e9cd95 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 30 Nov 2025 16:35:14 -0700 Subject: [PATCH 06/12] fixed tests. --- csharp_mod/FfiGlue.cs | 193 +++++++++++----------- rust_compiler/libs/parser/src/test/mod.rs | 4 +- 2 files changed, 101 insertions(+), 96 deletions(-) diff --git a/csharp_mod/FfiGlue.cs b/csharp_mod/FfiGlue.cs index 400805d..551d531 100644 --- a/csharp_mod/FfiGlue.cs +++ b/csharp_mod/FfiGlue.cs @@ -15,122 +15,127 @@ #pragma warning disable SA1500, SA1505, SA1507, #pragma warning disable SA1600, SA1601, SA1604, SA1605, SA1611, SA1615, SA1649, -namespace Slang { -using System; -using System.Runtime.InteropServices; +namespace Slang +{ + using System; + using System.Runtime.InteropServices; -public unsafe partial class Ffi { + public unsafe partial class Ffi + { #if IOS - private const string RustLib = "slang.framework/slang"; + private const string RustLib = "slang.framework/slang"; #else - public const string RustLib = "slang_compiler.dll"; + public const string RustLib = "slang_compiler.dll"; #endif -} - -/// -/// &'lt [T] but with a guaranteed #[repr(C)] layout. -/// -/// # C layout (for some given type T) -/// -/// ```c -/// typedef struct { -/// // Cannot be NULL -/// T * ptr; -/// size_t len; -/// } slice_T; -/// ``` -/// -/// # Nullable pointer? -/// -/// If you want to support the above typedef, but where the ptr field is -/// allowed to be NULL (with the contents of len then being undefined) -/// use the Option< slice_ptr<_> > type. -/// -[StructLayout(LayoutKind.Sequential, Size = 16)] -public unsafe struct slice_ref_uint16_t { - /// - /// Pointer to the first element (if any). - /// - public UInt16 /*const*/ * ptr; + } /// - /// Element count + /// &'lt [T] but with a guaranteed #[repr(C)] layout. + /// + /// # C layout (for some given type T) + /// + /// ```c + /// typedef struct { + /// // Cannot be NULL + /// T * ptr; + /// size_t len; + /// } slice_T; + /// ``` + /// + /// # Nullable pointer? + /// + /// If you want to support the above typedef, but where the ptr field is + /// allowed to be NULL (with the contents of len then being undefined) + /// use the Option< slice_ptr<_> > type. /// - public UIntPtr len; -} + [StructLayout(LayoutKind.Sequential, Size = 16)] + public unsafe struct slice_ref_uint16_t + { + /// + /// Pointer to the first element (if any). + /// + public UInt16 /*const*/ + * ptr; -/// -/// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout -/// -[StructLayout(LayoutKind.Sequential, Size = 24)] -public unsafe struct Vec_uint8_t { - public byte * ptr; + /// + /// Element count + /// + public UIntPtr len; + } - public UIntPtr len; - - public UIntPtr cap; -} - -public unsafe partial class Ffi { /// - /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because - /// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. - /// This should result in the ability to compile many times without triggering frame drops - /// from the GC from a GetBytes() call on a string in C#. + /// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout /// - [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern - Vec_uint8_t compile_from_string ( - slice_ref_uint16_t input); -} + [StructLayout(LayoutKind.Sequential, Size = 24)] + public unsafe struct Vec_uint8_t + { + public byte* ptr; -[StructLayout(LayoutKind.Sequential, Size = 64)] -public unsafe struct FfiToken_t { - public Vec_uint8_t tooltip; + public UIntPtr len; - public Vec_uint8_t error; + public UIntPtr cap; + } - public Int32 column; + public unsafe partial class Ffi + { + /// + /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because + /// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. + /// This should result in the ability to compile many times without triggering frame drops + /// from the GC from a GetBytes() call on a string in C#. + /// + [DllImport(RustLib, ExactSpelling = true)] + public static extern unsafe Vec_uint8_t compile_from_string(slice_ref_uint16_t input); + } - public Int32 length; + [StructLayout(LayoutKind.Sequential, Size = 64)] + public unsafe struct FfiToken_t + { + public Vec_uint8_t tooltip; - public UInt32 token_kind; -} + public Vec_uint8_t error; -/// -/// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout -/// -[StructLayout(LayoutKind.Sequential, Size = 24)] -public unsafe struct Vec_FfiToken_t { - public FfiToken_t * ptr; + public Int32 column; - public UIntPtr len; + public Int32 length; - public UIntPtr cap; -} + public UInt32 token_kind; + } -public unsafe partial class Ffi { - [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern - void free_ffi_token_vec ( - Vec_FfiToken_t v); -} - -public unsafe partial class Ffi { - [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern - void free_string ( - Vec_uint8_t s); -} - -public unsafe partial class Ffi { /// - /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because - /// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. - /// This should result in the ability to tokenize many times without triggering frame drops - /// from the GC from a GetBytes() call on a string in C#. + /// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout /// - [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern - Vec_FfiToken_t tokenize_line ( - slice_ref_uint16_t input); -} + [StructLayout(LayoutKind.Sequential, Size = 24)] + public unsafe struct Vec_FfiToken_t + { + public FfiToken_t* ptr; + public UIntPtr len; + public UIntPtr cap; + } + + public unsafe partial class Ffi + { + [DllImport(RustLib, ExactSpelling = true)] + public static extern unsafe void free_ffi_token_vec(Vec_FfiToken_t v); + } + + public unsafe partial class Ffi + { + [DllImport(RustLib, ExactSpelling = true)] + public static extern unsafe void free_string(Vec_uint8_t s); + } + + public unsafe partial class Ffi + { + /// + /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because + /// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. + /// This should result in the ability to tokenize many times without triggering frame drops + /// from the GC from a GetBytes() call on a string in C#. + /// + [DllImport(RustLib, ExactSpelling = true)] + public static extern unsafe Vec_FfiToken_t tokenize_line(slice_ref_uint16_t input); + } } /* Slang */ diff --git a/rust_compiler/libs/parser/src/test/mod.rs b/rust_compiler/libs/parser/src/test/mod.rs index 5822da9..c23869a 100644 --- a/rust_compiler/libs/parser/src/test/mod.rs +++ b/rust_compiler/libs/parser/src/test/mod.rs @@ -92,7 +92,7 @@ fn test_priority_expression() -> Result<()> { let expression = parser.parse()?.unwrap(); - assert_eq!("(let x = (4))", expression.to_string()); + assert_eq!("(let x = 4)", expression.to_string()); Ok(()) } @@ -109,7 +109,7 @@ fn test_binary_expression() -> Result<()> { assert_eq!("(((45 * 2) - (15 / 5)) + (5 ** 2))", expr.to_string()); let expr = parser!("(5 - 2) * 10").parse()?.unwrap(); - assert_eq!("(((5 - 2)) * 10)", expr.to_string()); + assert_eq!("((5 - 2) * 10)", expr.to_string()); Ok(()) } From 06a151ab7ee6db9ba656ec71ab086ea2323022f0 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 30 Nov 2025 20:31:06 -0700 Subject: [PATCH 07/12] wip -- lsp mappings to various types --- csharp_mod/FfiGlue.cs | 59 +++- csharp_mod/Formatter.cs | 24 +- csharp_mod/Marshal.cs | 23 -- rust_compiler/Cargo.lock | 44 +++ rust_compiler/Cargo.toml | 2 + rust_compiler/libs/compiler/Cargo.toml | 1 + .../test/declaration_function_invocation.rs | 4 +- rust_compiler/libs/compiler/src/test/mod.rs | 4 +- rust_compiler/libs/compiler/src/v1.rs | 320 +++++++++++++----- rust_compiler/libs/parser/Cargo.toml | 1 + rust_compiler/libs/parser/src/lib.rs | 120 +++++-- rust_compiler/libs/parser/src/tree_node.rs | 31 +- rust_compiler/libs/tokenizer/Cargo.toml | 1 + rust_compiler/libs/tokenizer/src/lib.rs | 33 ++ rust_compiler/src/ffi/mod.rs | 103 ++++++ rust_compiler/src/lib.rs | 106 +----- rust_compiler/src/lsp/mod.rs | 0 rust_compiler/src/main.rs | 19 +- 18 files changed, 640 insertions(+), 255 deletions(-) create mode 100644 rust_compiler/src/ffi/mod.rs create mode 100644 rust_compiler/src/lsp/mod.rs diff --git a/csharp_mod/FfiGlue.cs b/csharp_mod/FfiGlue.cs index 551d531..668bb96 100644 --- a/csharp_mod/FfiGlue.cs +++ b/csharp_mod/FfiGlue.cs @@ -88,6 +88,53 @@ namespace Slang public static extern unsafe Vec_uint8_t compile_from_string(slice_ref_uint16_t input); } + [StructLayout(LayoutKind.Sequential, Size = 16)] + public unsafe struct FfiRange_t + { + public UInt32 start_col; + + public UInt32 end_col; + + public UInt32 start_line; + + public UInt32 end_line; + } + + [StructLayout(LayoutKind.Sequential, Size = 48)] + public unsafe struct FfiDiagnostic_t + { + public Vec_uint8_t message; + + public Int32 severity; + + public FfiRange_t range; + } + + /// + /// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout + /// + [StructLayout(LayoutKind.Sequential, Size = 24)] + public unsafe struct Vec_FfiDiagnostic_t + { + public FfiDiagnostic_t* ptr; + + public UIntPtr len; + + public UIntPtr cap; + } + + public unsafe partial class Ffi + { + [DllImport(RustLib, ExactSpelling = true)] + public static extern unsafe Vec_FfiDiagnostic_t diagnose_source(); + } + + public unsafe partial class Ffi + { + [DllImport(RustLib, ExactSpelling = true)] + public static extern unsafe void free_ffi_diagnostic_vec(Vec_FfiDiagnostic_t v); + } + [StructLayout(LayoutKind.Sequential, Size = 64)] public unsafe struct FfiToken_t { @@ -126,16 +173,4 @@ namespace Slang [DllImport(RustLib, ExactSpelling = true)] public static extern unsafe void free_string(Vec_uint8_t s); } - - public unsafe partial class Ffi - { - /// - /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because - /// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. - /// This should result in the ability to tokenize many times without triggering frame drops - /// from the GC from a GetBytes() call on a string in C#. - /// - [DllImport(RustLib, ExactSpelling = true)] - public static extern unsafe Vec_FfiToken_t tokenize_line(slice_ref_uint16_t input); - } } /* Slang */ diff --git a/csharp_mod/Formatter.cs b/csharp_mod/Formatter.cs index 973351a..b26d2cf 100644 --- a/csharp_mod/Formatter.cs +++ b/csharp_mod/Formatter.cs @@ -1,15 +1,20 @@ namespace Slang; +using System.Timers; using StationeersIC10Editor; public class SlangFormatter : ICodeFormatter { + private Timer _timer; + public static readonly uint ColorInstruction = ColorFromHTML("#ffff00"); public static readonly uint ColorString = ColorFromHTML("#ce9178"); - public override Line ParseLine(string line) + public SlangFormatter() { - return Marshal.TokenizeLine(line); + _timer = new Timer(250); + + this.OnCodeChanged += HandleCodeChanged; } public override string Compile() @@ -17,4 +22,19 @@ public class SlangFormatter : ICodeFormatter L.Info("ICodeFormatter attempted to compile source code."); return this.Lines.RawText; } + + public override Line ParseLine(string line) + { + return new Line(line); + } + + private void HandleCodeChanged() + { + _timer.Stop(); + _timer.Dispose(); + _timer = new Timer(250); + _timer.Elapsed += (_, _) => HandleLsp(); + } + + private void HandleLsp() { } } diff --git a/csharp_mod/Marshal.cs b/csharp_mod/Marshal.cs index e0b54ab..ffb6a58 100644 --- a/csharp_mod/Marshal.cs +++ b/csharp_mod/Marshal.cs @@ -61,29 +61,6 @@ public static class Marshal } } - public static unsafe Line TokenizeLine(string source) - { - if (String.IsNullOrEmpty(source)) - { - return new Line(source); - } - - if (!EnsureLibLoaded()) - { - return new Line(source); - } - - fixed (char* ptrString = source) - { - var input = new slice_ref_uint16_t - { - ptr = (ushort*)ptrString, - len = (UIntPtr)source.Length, - }; - return Ffi.tokenize_line(input).ToLine(source); - } - } - public static unsafe bool CompileFromString(string inputString, out string compiledString) { if (String.IsNullOrEmpty(inputString)) diff --git a/rust_compiler/Cargo.lock b/rust_compiler/Cargo.lock index 3d8cef8..b42d3c4 100644 --- a/rust_compiler/Cargo.lock +++ b/rust_compiler/Cargo.lock @@ -114,6 +114,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitvec" version = "1.0.1" @@ -247,6 +253,7 @@ version = "0.1.0" dependencies = [ "anyhow", "indoc", + "lsp-types", "parser", "pretty_assertions", "quick-error", @@ -300,6 +307,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320bea982e85d42441eb25c49b41218e7eaa2657e8f90bc4eca7437376751e23" +[[package]] +name = "fluent-uri" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c704e9dbe1ddd863da1e6ff3567795087b1eb201ce80d8fa81162e1516500d" +dependencies = [ + "bitflags", +] + [[package]] name = "funty" version = "2.0.0" @@ -400,6 +416,19 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "lsp-types" +version = "0.97.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53353550a17c04ac46c585feb189c2db82154fc84b79c7a66c96c2c644f66071" +dependencies = [ + "bitflags", + "fluent-uri", + "serde", + "serde_json", + "serde_repr", +] + [[package]] name = "macro_rules_attribute" version = "0.1.3" @@ -466,6 +495,7 @@ name = "parser" version = "0.1.0" dependencies = [ "anyhow", + "lsp-types", "quick-error", "tokenizer", ] @@ -732,6 +762,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", + "serde_derive", ] [[package]] @@ -767,6 +798,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "sha2-const-stable" version = "0.1.0" @@ -786,6 +828,7 @@ dependencies = [ "anyhow", "clap", "compiler", + "lsp-types", "parser", "quick-error", "rust_decimal", @@ -882,6 +925,7 @@ name = "tokenizer" version = "0.1.0" dependencies = [ "anyhow", + "lsp-types", "quick-error", "rust_decimal", ] diff --git a/rust_compiler/Cargo.toml b/rust_compiler/Cargo.toml index ae125f6..8286678 100644 --- a/rust_compiler/Cargo.toml +++ b/rust_compiler/Cargo.toml @@ -10,6 +10,7 @@ members = ["libs/*"] quick-error = "2" rust_decimal = "1" safer-ffi = { version = "0.1" } +lsp-types = { version = "0.97" } [features] headers = ["safer-ffi/headers"] @@ -33,6 +34,7 @@ crate-type = ["cdylib", "rlib"] [dependencies] clap = { version = "^4.5", features = ["derive"] } +lsp-types = { workspace = true } quick-error = { workspace = true } rust_decimal = { workspace = true } tokenizer = { path = "libs/tokenizer" } diff --git a/rust_compiler/libs/compiler/Cargo.toml b/rust_compiler/libs/compiler/Cargo.toml index 85434ac..a820e20 100644 --- a/rust_compiler/libs/compiler/Cargo.toml +++ b/rust_compiler/libs/compiler/Cargo.toml @@ -7,6 +7,7 @@ edition = "2024" quick-error = { workspace = true } parser = { path = "../parser" } tokenizer = { path = "../tokenizer" } +lsp-types = { workspace = true } [dev-dependencies] anyhow = { version = "1.0" } diff --git a/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs b/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs index 23c8f55..2e0c3c2 100644 --- a/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs +++ b/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs @@ -82,8 +82,8 @@ fn incorrect_args_count() -> anyhow::Result<()> { }; assert!(matches!( - compiled, - Err(super::super::Error::AgrumentMismatch(_, _)) + compiled[0], + super::super::Error::AgrumentMismatch(_, _) )); Ok(()) diff --git a/rust_compiler/libs/compiler/src/test/mod.rs b/rust_compiler/libs/compiler/src/test/mod.rs index 6a4ba07..0c8aac1 100644 --- a/rust_compiler/libs/compiler/src/test/mod.rs +++ b/rust_compiler/libs/compiler/src/test/mod.rs @@ -15,7 +15,7 @@ macro_rules! compile { &mut writer, None, ); - compiler.compile()?; + compiler.compile(); output!(writer) }}; @@ -36,7 +36,7 @@ macro_rules! compile { &mut writer, Some(crate::CompilerConfig { debug: true }), ); - compiler.compile()?; + compiler.compile(); output!(writer) }}; } diff --git a/rust_compiler/libs/compiler/src/v1.rs b/rust_compiler/libs/compiler/src/v1.rs index aaefba6..82cb19b 100644 --- a/rust_compiler/libs/compiler/src/v1.rs +++ b/rust_compiler/libs/compiler/src/v1.rs @@ -39,8 +39,8 @@ quick_error! { ParseError(error: parser::Error) { from() } - IoError(error: std::io::Error) { - from() + IoError(error: String) { + display("IO Error: {}", error) } ScopeError(error: variable_manager::Error) { from() @@ -63,6 +63,49 @@ quick_error! { } } +impl From for lsp_types::Diagnostic { + fn from(value: Error) -> Self { + use Error::*; + use lsp_types::*; + match value { + ParseError(e) => e.into(), + IoError(e) => Diagnostic { + message: e.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + ScopeError(e) => Diagnostic { + message: e.to_string(), + range: Range::default(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + DuplicateIdentifier(_, span) + | UnknownIdentifier(_, span) + | InvalidDevice(_, span) + | AgrumentMismatch(_, span) => Diagnostic { + range: span.into(), + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + Unknown(msg, span) => Diagnostic { + message: msg.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + range: span.map(lsp_types::Range::from).unwrap_or_default(), + ..Default::default() + }, + } + } +} + +// Map io::Error to Error manually since we can't clone io::Error +impl From for Error { + fn from(err: std::io::Error) -> Self { + Error::IoError(err.to_string()) + } +} + #[derive(Default)] #[repr(C)] pub struct CompilerConfig { @@ -77,7 +120,7 @@ struct CompilationResult { } pub struct Compiler<'a, W: std::io::Write> { - parser: ASTParser<'a>, + pub parser: ASTParser<'a>, function_locations: HashMap, function_metadata: HashMap>, devices: HashMap, @@ -88,6 +131,7 @@ pub struct Compiler<'a, W: std::io::Write> { temp_counter: usize, label_counter: usize, loop_stack: Vec<(String, String)>, // Stores (start_label, end_label) + pub errors: Vec, } impl<'a, W: std::io::Write> Compiler<'a, W> { @@ -108,19 +152,30 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { temp_counter: 0, label_counter: 0, loop_stack: Vec::new(), + errors: Vec::new(), } } - pub fn compile(mut self) -> Result<(), Error> { - let expr = self.parser.parse_all()?; + pub fn compile(mut self) -> Vec { + let expr = self.parser.parse_all(); - let Some(expr) = expr else { return Ok(()) }; + // Copy errors from parser + for e in std::mem::take(&mut self.parser.errors) { + self.errors.push(Error::ParseError(e)); + } - // Wrap the root expression in a dummy span for consistency, - // since parse_all returns an unspanned Expression (usually Block) - // that contains spanned children. - // We know parse_all returns Expression::Block which has an internal span, - // but for type consistency we wrap it. + // We treat parse_all result as potentially partial + let expr = match expr { + Ok(Some(expr)) => expr, + Ok(None) => return self.errors, + Err(e) => { + // Should be covered by parser.errors, but just in case + self.errors.push(Error::ParseError(e)); + return self.errors; + } + }; + + // Wrap the root expression in a dummy span for consistency let span = if let Expression::Block(ref block) = expr { block.span } else { @@ -134,10 +189,17 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let spanned_root = Spanned { node: expr, span }; - self.write_output("j main")?; + if let Err(e) = self.write_output("j main") { + self.errors.push(e); + return self.errors; + } + // We ignore the result of the root expression (usually a block) - let _ = self.expression(spanned_root, &mut VariableScope::default())?; - Ok(()) + if let Err(e) = self.expression(spanned_root, &mut VariableScope::default()) { + self.errors.push(e); + } + + self.errors } fn write_output(&mut self, output: impl Into) -> Result<(), Error> { @@ -255,13 +317,20 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { _ => Ok(None), // String literals don't return values in this context typically }, Expression::Variable(name) => { - let loc = scope - .get_location_of(&name.node) - .map_err(|_| Error::UnknownIdentifier(name.node.clone(), name.span))?; - Ok(Some(CompilationResult { - location: loc, - temp_name: None, // User variable, do not free - })) + match scope.get_location_of(&name.node) { + Ok(loc) => Ok(Some(CompilationResult { + location: loc, + temp_name: None, // User variable, do not free + })), + Err(_) => { + self.errors + .push(Error::UnknownIdentifier(name.node.clone(), name.span)); + Ok(Some(CompilationResult { + location: VariableLocation::Temporary(0), + temp_name: None, + })) + } + } } Expression::Priority(inner_expr) => self.expression(*inner_expr, scope), Expression::Negation(inner_expr) => { @@ -428,9 +497,16 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { (var_loc, None) } Expression::Variable(name) => { - let src_loc = scope - .get_location_of(&name.node) - .map_err(|_| Error::UnknownIdentifier(name.node.clone(), name.span))?; + let src_loc_res = scope.get_location_of(&name.node); + + let src_loc = match src_loc_res { + Ok(l) => l, + Err(_) => { + self.errors + .push(Error::UnknownIdentifier(name.node.clone(), name.span)); + VariableLocation::Temporary(0) + } + }; let var_loc = scope.add_variable(&name_str, LocationRequest::Persist)?; @@ -488,9 +564,16 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { expression, } = expr; - let location = scope - .get_location_of(&identifier.node) - .map_err(|_| Error::UnknownIdentifier(identifier.node.clone(), identifier.span))?; + let location = match scope.get_location_of(&identifier.node) { + Ok(l) => l, + Err(_) => { + self.errors.push(Error::UnknownIdentifier( + identifier.node.clone(), + identifier.span, + )); + VariableLocation::Temporary(0) + } + }; let (val_str, cleanup) = self.compile_operand(*expression, scope)?; @@ -533,15 +616,26 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let InvocationExpression { name, arguments } = invoke_expr.node; if !self.function_locations.contains_key(&name.node) { - return Err(Error::UnknownIdentifier(name.node.clone(), name.span)); + self.errors + .push(Error::UnknownIdentifier(name.node.clone(), name.span)); + // Don't emit call, just pretend we did? + // Actually, we should probably emit a dummy call or just skip to avoid logic errors + // But if we skip, registers might be unbalanced if something expected a return. + // For now, let's just return early. + return Ok(()); } let Some(args) = self.function_metadata.get(&name.node) else { + // Should be covered by check above return Err(Error::UnknownIdentifier(name.node.clone(), name.span)); }; if args.len() != arguments.len() { - return Err(Error::AgrumentMismatch(name.node, name.span)); + self.errors + .push(Error::AgrumentMismatch(name.node.clone(), name.span)); + // Proceed anyway? The assembly will likely crash or act weird. + // Best to skip generation of this call to prevent bad IC10 + return Ok(()); } // backup all used registers to the stack @@ -564,9 +658,14 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { _ => {} }, Expression::Variable(var_name) => { - let loc = stack - .get_location_of(var_name.node.clone()) - .map_err(|_| Error::UnknownIdentifier(var_name.node, var_name.span))?; + let loc = match stack.get_location_of(var_name.node.clone()) { + Ok(l) => l, + Err(_) => { + self.errors + .push(Error::UnknownIdentifier(var_name.node, var_name.span)); + VariableLocation::Temporary(0) + } + }; match loc { VariableLocation::Persistant(reg) | VariableLocation::Temporary(reg) => { @@ -655,7 +754,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { span: Span, ) -> Result<(), Error> { if self.devices.contains_key(&expr.name.node) { - return Err(Error::DuplicateIdentifier(expr.name.node, span)); + self.errors + .push(Error::DuplicateIdentifier(expr.name.node.clone(), span)); + // We can overwrite or ignore. Let's ignore new declaration to avoid cascading errors? + // Actually, for recovery, maybe we want to allow it so subsequent uses work? + // But we already have it. + return Ok(()); } self.devices.insert(expr.name.node, expr.device); @@ -832,10 +936,15 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { return Ok((format!("-{}", n), None)); } - let result = self.expression(expr, scope)?.ok_or(Error::Unknown( - "Expression did not return a value".into(), - None, - ))?; + let result_opt = self.expression(expr, scope)?; + + let result = match result_opt { + Some(r) => r, + None => { + // Expression failed or returned void. Recover with dummy. + return Ok(("r0".to_string(), None)); + } + }; match result.location { VariableLocation::Temporary(r) | VariableLocation::Persistant(r) => { @@ -1032,13 +1141,18 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.expression_return(*ret_expr, scope)?; } _ => { - let result = self.expression(expr, scope)?; - // If the expression was a statement that returned a temp result (e.g. `1 + 2;` line), - // we must free it to avoid leaking registers. - if let Some(comp_res) = result - && let Some(name) = comp_res.temp_name - { - scope.free_temp(name)?; + // Swallow errors within expressions so block can continue + if let Err(e) = self.expression(expr, scope).and_then(|result| { + // If the expression was a statement that returned a temp result (e.g. `1 + 2;` line), + // we must free it to avoid leaking registers. + if let Some(comp_res) = result + && let Some(name) = comp_res.temp_name + { + scope.free_temp(name)?; + } + Ok(()) + }) { + self.errors.push(e); } } } @@ -1063,27 +1177,33 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { }; match expr.node { - Expression::Variable(var_name) => match scope - .get_location_of(&var_name.node) - .map_err(|_| Error::UnknownIdentifier(var_name.node, var_name.span))? - { - VariableLocation::Temporary(reg) | VariableLocation::Persistant(reg) => { - self.write_output(format!( - "move r{} r{reg} {}", - VariableScope::RETURN_REGISTER, - debug!(self, "#returnValue") - ))?; - } - VariableLocation::Stack(offset) => { - self.write_output(format!( - "sub r{} sp {offset}", - VariableScope::TEMP_STACK_REGISTER - ))?; - self.write_output(format!( - "get r{} db r{}", - VariableScope::RETURN_REGISTER, - VariableScope::TEMP_STACK_REGISTER - ))?; + Expression::Variable(var_name) => match scope.get_location_of(&var_name.node) { + Ok(loc) => match loc { + VariableLocation::Temporary(reg) | VariableLocation::Persistant(reg) => { + self.write_output(format!( + "move r{} r{reg} {}", + VariableScope::RETURN_REGISTER, + debug!(self, "#returnValue") + ))?; + } + VariableLocation::Stack(offset) => { + self.write_output(format!( + "sub r{} sp {offset}", + VariableScope::TEMP_STACK_REGISTER + ))?; + self.write_output(format!( + "get r{} db r{}", + VariableScope::RETURN_REGISTER, + VariableScope::TEMP_STACK_REGISTER + ))?; + } + }, + Err(_) => { + self.errors.push(Error::UnknownIdentifier( + var_name.node.clone(), + var_name.span, + )); + // Proceed with dummy } }, Expression::Literal(spanned_lit) => match spanned_lit.node { @@ -1189,9 +1309,18 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let device_name = device_spanned.node; - let Some(device_val) = self.devices.get(&device_name) else { - return Err(Error::InvalidDevice(device_name, device_spanned.span)); - }; + if !self.devices.contains_key(&device_name) { + self.errors.push(Error::InvalidDevice( + device_name.clone(), + device_spanned.span, + )); + } + + let device_val = self + .devices + .get(&device_name) + .cloned() + .unwrap_or("d0".to_string()); let Literal::String(logic_type) = logic_type else { return Err(Error::AgrumentMismatch( @@ -1241,9 +1370,18 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let device_name = device_spanned.node; - let Some(device_val) = self.devices.get(&device_name) else { - return Err(Error::InvalidDevice(device_name, device_spanned.span)); - }; + if !self.devices.contains_key(&device_name) { + self.errors.push(Error::InvalidDevice( + device_name.clone(), + device_spanned.span, + )); + } + + let device_val = self + .devices + .get(&device_name) + .cloned() + .unwrap_or("d0".to_string()); let Literal::String(logic_type) = logic_type else { return Err(Error::AgrumentMismatch( @@ -1286,7 +1424,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } = expr.node; if self.function_locations.contains_key(&name.node) { - return Err(Error::DuplicateIdentifier(name.node.clone(), name.span)); + self.errors + .push(Error::DuplicateIdentifier(name.node.clone(), name.span)); + // Fallthrough to allow compiling the body anyway? + // It might be useful to check body for errors. } self.function_metadata.insert( @@ -1356,26 +1497,33 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.expression_return(*ret_expr, &mut block_scope)?; } _ => { - let result = self.expression(expr, &mut block_scope)?; - // Free unused statement results - if let Some(comp_res) = result - && let Some(name) = comp_res.temp_name - { - block_scope.free_temp(name)?; + // Swallow internal errors + if let Err(e) = self.expression(expr, &mut block_scope).and_then(|result| { + if let Some(comp_res) = result + && let Some(name) = comp_res.temp_name + { + block_scope.free_temp(name)?; + } + Ok(()) + }) { + self.errors.push(e); } } } } // Get the saved return address and save it back into `ra` - let VariableLocation::Stack(ra_stack_offset) = block_scope - .get_location_of(format!("{}_ra", name.node)) - .map_err(Error::ScopeError)? - else { - return Err(Error::Unknown( - "Stored return address not in stack as expected".into(), - Some(name.span), - )); + let ra_res = block_scope.get_location_of(format!("{}_ra", name.node)); + let ra_stack_offset = match ra_res { + Ok(VariableLocation::Stack(offset)) => offset, + _ => { + // If we can't find RA, we can't return properly. + // This usually implies a compiler bug or scope tracking error. + return Err(Error::Unknown( + "Stored return address not in stack as expected".into(), + Some(name.span), + )); + } }; self.write_output(format!( diff --git a/rust_compiler/libs/parser/Cargo.toml b/rust_compiler/libs/parser/Cargo.toml index 2d1639a..5ff0cd5 100644 --- a/rust_compiler/libs/parser/Cargo.toml +++ b/rust_compiler/libs/parser/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] quick-error = { workspace = true } tokenizer = { path = "../tokenizer" } +lsp-types = { workspace = true } [dev-dependencies] diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index 19ab4ec..85496d9 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -48,6 +48,30 @@ quick_error! { } } +impl From for lsp_types::Diagnostic { + fn from(value: Error) -> Self { + use Error::*; + use lsp_types::*; + match value { + TokenizerError(e) => e.into(), + UnexpectedToken(span, _) + | DuplicateIdentifier(span, _) + | InvalidSyntax(span, _) + | UnsupportedKeyword(span, _) => Diagnostic { + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + range: span.into(), + ..Default::default() + }, + UnexpectedEOF => Diagnostic { + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + } + } +} + macro_rules! self_matches_peek { ($self:ident, $pattern:pat) => { matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. })) @@ -84,6 +108,7 @@ macro_rules! self_matches_current { pub struct Parser<'a> { tokenizer: TokenizerBuffer<'a>, current_token: Option, + pub errors: Vec, } impl<'a> Parser<'a> { @@ -91,6 +116,7 @@ impl<'a> Parser<'a> { Parser { tokenizer: TokenizerBuffer::new(tokenizer), current_token: None, + errors: Vec::new(), } } @@ -158,8 +184,45 @@ impl<'a> Parser<'a> { }) } + /// Skips tokens until a statement boundary is found to recover from errors. + fn synchronize(&mut self) -> Result<(), Error> { + // We advance once to consume the error-causing token if we haven't already + // But often the error happens after we consumed something. + // Safe bet: consume current, then look. + + // If we assign next, we might be skipping the very token we want to sync on if the error didn't consume it? + // Usually, in recursive descent, the error is raised when `current` is unexpected. + // We want to discard `current` and move on. + self.assign_next()?; + + while let Some(token) = &self.current_token { + if token.token_type == TokenType::Symbol(Symbol::Semicolon) { + // Consuming the semicolon is a good place to stop and resume parsing next statement + self.assign_next()?; + return Ok(()); + } + + // Check if the token looks like the start of a statement. + // If so, we don't consume it; we return so the loop in parse_all can try to parse it. + match token.token_type { + TokenType::Keyword(Keyword::Fn) + | TokenType::Keyword(Keyword::Let) + | TokenType::Keyword(Keyword::If) + | TokenType::Keyword(Keyword::While) + | TokenType::Keyword(Keyword::Loop) + | TokenType::Keyword(Keyword::Device) + | TokenType::Keyword(Keyword::Return) => return Ok(()), + _ => {} + } + + self.assign_next()?; + } + + Ok(()) + } + pub fn parse_all(&mut self) -> Result, Error> { - let first_token = self.tokenizer.peek()?; + let first_token = self.tokenizer.peek().unwrap_or(None); let (start_line, start_col) = first_token .as_ref() .map(|tok| (tok.line, tok.column)) @@ -167,28 +230,38 @@ impl<'a> Parser<'a> { let mut expressions = Vec::>::new(); - while let Some(expression) = self.parse()? { - expressions.push(expression); + loop { + // Check EOF without unwrapping error + match self.tokenizer.peek() { + Ok(None) => break, + Err(e) => { + self.errors.push(Error::TokenizerError(e)); + break; + } + _ => {} + } + + match self.parse() { + Ok(Some(expression)) => { + expressions.push(expression); + } + Ok(None) => break, + Err(e) => { + self.errors.push(e); + // Recover + if self.synchronize().is_err() { + // If sync failed (e.g. EOF during sync), break + break; + } + } + } } - if expressions.is_empty() { - let span = Span { - start_line, - end_line: start_line, - start_col, - end_col: start_col, - }; - - return Ok(Some(Expression::Block(Spanned { - node: BlockExpression(vec![]), - span, - }))); - } - - self.tokenizer.seek(SeekFrom::Current(-1))?; - - let end_token_opt = self.tokenizer.peek()?; + // Even if we had errors, we return whatever partial AST we managed to build. + // If expressions is empty and we had errors, it's a failed parse, but we return a block. + // Use the last token position for end span, or start if nothing parsed + let end_token_opt = self.tokenizer.peek().unwrap_or(None); let (end_line, end_col) = end_token_opt .map(|tok| { let len = tok.original_string.as_ref().map(|s| s.len()).unwrap_or(0); @@ -211,6 +284,12 @@ impl<'a> Parser<'a> { pub fn parse(&mut self) -> Result>, Error> { self.assign_next()?; + + // If assign_next hit EOF or error? + if self.current_token.is_none() { + return Ok(None); + } + let expr = self.expression()?; if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) { @@ -1469,4 +1548,3 @@ impl<'a> Parser<'a> { } } } - diff --git a/rust_compiler/libs/parser/src/tree_node.rs b/rust_compiler/libs/parser/src/tree_node.rs index 8133b27..a968ed4 100644 --- a/rust_compiler/libs/parser/src/tree_node.rs +++ b/rust_compiler/libs/parser/src/tree_node.rs @@ -222,6 +222,36 @@ pub struct Span { pub end_col: usize, } +impl From for lsp_types::Range { + fn from(value: Span) -> Self { + Self { + start: lsp_types::Position { + line: value.start_line as u32, + character: value.start_col as u32, + }, + end: lsp_types::Position { + line: value.end_line as u32, + character: value.end_col as u32, + }, + } + } +} + +impl From<&Span> for lsp_types::Range { + fn from(value: &Span) -> Self { + Self { + start: lsp_types::Position { + line: value.start_line as u32, + character: value.start_col as u32, + }, + end: lsp_types::Position { + line: value.end_line as u32, + character: value.end_col as u32, + }, + } + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct Spanned { pub span: Span, @@ -293,4 +323,3 @@ impl std::fmt::Display for Expression { } } } - diff --git a/rust_compiler/libs/tokenizer/Cargo.toml b/rust_compiler/libs/tokenizer/Cargo.toml index 100b2b7..38032f4 100644 --- a/rust_compiler/libs/tokenizer/Cargo.toml +++ b/rust_compiler/libs/tokenizer/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] rust_decimal = { workspace = true } quick-error = { workspace = true } +lsp-types = { workspace = true } [dev-dependencies] anyhow = { version = "^1" } diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs index 8f2dc41..9434c2d 100644 --- a/rust_compiler/libs/tokenizer/src/lib.rs +++ b/rust_compiler/libs/tokenizer/src/lib.rs @@ -35,6 +35,39 @@ quick_error! { } } +impl From for lsp_types::Diagnostic { + fn from(value: Error) -> Self { + use Error::*; + use lsp_types::*; + + match value { + IOError(e) => Diagnostic { + message: e.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + NumberParseError(_, l, c, ref og) + | DecimalParseError(_, l, c, ref og) + | UnknownSymbolError(_, l, c, ref og) + | UnknownKeywordOrIdentifierError(_, l, c, ref og) => Diagnostic { + range: Range { + start: Position { + line: l as u32, + character: c as u32, + }, + end: Position { + line: l as u32, + character: (c + og.len()) as u32, + }, + }, + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + } + } +} + pub trait Tokenize: Read + Seek {} impl Tokenize for T where T: Read + Seek {} diff --git a/rust_compiler/src/ffi/mod.rs b/rust_compiler/src/ffi/mod.rs new file mode 100644 index 0000000..4754a20 --- /dev/null +++ b/rust_compiler/src/ffi/mod.rs @@ -0,0 +1,103 @@ +use compiler::Compiler; +use parser::Parser; +use safer_ffi::prelude::*; +use std::io::BufWriter; +use tokenizer::Tokenizer; + +#[derive_ReprC] +#[repr(C)] +pub struct FfiToken { + pub tooltip: safer_ffi::String, + pub error: safer_ffi::String, + pub column: i32, + pub length: i32, + pub token_kind: u32, +} + +#[derive_ReprC] +#[repr(C)] +pub struct FfiRange { + start_col: u32, + end_col: u32, + start_line: u32, + end_line: u32, +} + +impl From for FfiRange { + fn from(value: lsp_types::Range) -> Self { + Self { + start_col: value.start.character, + end_col: value.end.character, + start_line: value.start.line, + end_line: value.end.line, + } + } +} + +#[derive_ReprC] +#[repr(C)] +pub struct FfiDiagnostic { + message: safer_ffi::String, + severity: i32, + range: FfiRange, +} + +impl From for FfiDiagnostic { + fn from(value: lsp_types::Diagnostic) -> Self { + use lsp_types::*; + Self { + message: value.message.into(), + severity: match value.severity.unwrap_or(DiagnosticSeverity::ERROR) { + DiagnosticSeverity::WARNING => 2, + DiagnosticSeverity::INFORMATION => 3, + DiagnosticSeverity::HINT => 4, + _ => 1, + }, + range: value.range.into(), + } + } +} + +#[ffi_export] +pub fn free_ffi_token_vec(v: safer_ffi::Vec) { + drop(v) +} + +#[ffi_export] +pub fn free_ffi_diagnostic_vec(v: safer_ffi::Vec) { + drop(v) +} + +#[ffi_export] +pub fn free_string(s: safer_ffi::String) { + drop(s) +} + +/// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because +/// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. +/// This should result in the ability to compile many times without triggering frame drops +/// from the GC from a `GetBytes()` call on a string in C#. +#[ffi_export] +pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::String { + let mut writer = BufWriter::new(Vec::new()); + + let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let parser = Parser::new(tokenizer); + let compiler = Compiler::new(parser, &mut writer, None); + + if !compiler.compile().is_empty() { + return safer_ffi::String::EMPTY; + } + + let Ok(compiled_vec) = writer.into_inner() else { + return safer_ffi::String::EMPTY; + }; + + // Safety: I know the compiler only outputs valid utf8 + safer_ffi::String::from(unsafe { String::from_utf8_unchecked(compiled_vec) }) +} + +#[ffi_export] +pub fn diagnose_source() -> safer_ffi::Vec { + vec![].into() +} diff --git a/rust_compiler/src/lib.rs b/rust_compiler/src/lib.rs index c171cdd..6ddcd4d 100644 --- a/rust_compiler/src/lib.rs +++ b/rust_compiler/src/lib.rs @@ -1,107 +1,5 @@ -use compiler::Compiler; -use parser::Parser; -use safer_ffi::prelude::*; -use std::io::BufWriter; -use tokenizer::{token::TokenType, Error as TokenizerError, Tokenizer}; - -#[derive_ReprC] -#[repr(C)] -pub struct FfiToken { - pub tooltip: safer_ffi::String, - pub error: safer_ffi::String, - pub column: i32, - pub length: i32, - pub token_kind: u32, -} - -fn map_token_kind(t: &TokenType) -> u32 { - use TokenType::*; - match t { - Keyword(_) => 1, - Identifier(_) => 2, - Number(_) => 3, - String(_) => 4, - Boolean(_) => 5, - Symbol(_) => 6, - _ => 0, - } -} - -/// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because -/// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. -/// This should result in the ability to compile many times without triggering frame drops -/// from the GC from a `GetBytes()` call on a string in C#. -#[ffi_export] -pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::String { - let mut writer = BufWriter::new(Vec::new()); - - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); - let parser = Parser::new(tokenizer); - let compiler = Compiler::new(parser, &mut writer, None); - - if compiler.compile().is_err() { - return safer_ffi::String::EMPTY; - } - - let Ok(compiled_vec) = writer.into_inner() else { - return safer_ffi::String::EMPTY; - }; - - // Safety: I know the compiler only outputs valid utf8 - safer_ffi::String::from(unsafe { String::from_utf8_unchecked(compiled_vec) }) -} -/// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because -/// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. -/// This should result in the ability to tokenize many times without triggering frame drops -/// from the GC from a `GetBytes()` call on a string in C#. -#[ffi_export] -pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); - - let mut tokens = Vec::::new(); - - for token in tokenizer { - match token { - Err(TokenizerError::NumberParseError(_, _, col, ref str)) - | Err(TokenizerError::UnknownSymbolError(_, _, col, ref str)) - | Err(TokenizerError::DecimalParseError(_, _, col, ref str)) - | Err(TokenizerError::UnknownKeywordOrIdentifierError(_, _, col, ref str)) => { - tokens.push(FfiToken { - column: col as i32 - 1, - tooltip: "".into(), - length: str.len() as i32, - token_kind: 0, - // Safety: it's okay to unwrap the err here because we are matching on the `Err` variant - error: token.unwrap_err().to_string().into(), - }); - } - Err(_) => return safer_ffi::Vec::EMPTY, - Ok(token) if !matches!(token.token_type, TokenType::EOF) => tokens.push(FfiToken { - tooltip: "".into(), - error: "".into(), - length: token - .original_string - .map(|s| s.len() as i32) - .unwrap_or_default(), - token_kind: map_token_kind(&token.token_type), - column: token.column as i32 - 1, - }), - _ => {} - } - } - - tokens.into() -} - -#[ffi_export] -pub fn free_ffi_token_vec(v: safer_ffi::Vec) { - drop(v) -} - -#[ffi_export] -pub fn free_string(s: safer_ffi::String) { - drop(s) -} +mod ffi; +pub(crate) mod lsp; #[cfg(feature = "headers")] pub fn generate_headers() -> std::io::Result<()> { diff --git a/rust_compiler/src/lsp/mod.rs b/rust_compiler/src/lsp/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/rust_compiler/src/main.rs b/rust_compiler/src/main.rs index 619d696..730b2a2 100644 --- a/rust_compiler/src/main.rs +++ b/rust_compiler/src/main.rs @@ -8,7 +8,7 @@ use compiler::Compiler; use parser::Parser as ASTParser; use std::{ fs::File, - io::{BufWriter, Read, Write}, + io::{stderr, BufWriter, Read, Write}, path::PathBuf, }; use tokenizer::{self, Tokenizer}; @@ -75,7 +75,22 @@ fn run_logic() -> Result<(), StationlangError> { let compiler = Compiler::new(parser, &mut writer, None); - compiler.compile()?; + let mut errors = compiler.compile(); + + if !errors.is_empty() { + let mut std_error = stderr(); + let last = errors.pop(); + let errors = errors.into_iter().map(StationlangError::from); + + std_error.write_all(b"Compilation error:\n")?; + + for err in errors { + std_error.write_all(format!("{}\n", err).as_bytes())?; + } + + return Err(StationlangError::from(last.unwrap())); + } + writer.flush()?; Ok(()) From 25d9222bd43c910a9aa2496b95bc68a311191075 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Mon, 1 Dec 2025 02:54:53 -0700 Subject: [PATCH 08/12] WIP -- emit compilation errors --- csharp_mod/Extensions.cs | 47 ++++- csharp_mod/FfiGlue.cs | 242 +++++++++++----------- csharp_mod/Formatter.cs | 118 ++++++++++- csharp_mod/Marshal.cs | 64 +++++- csharp_mod/Patches.cs | 2 - csharp_mod/Plugin.cs | 26 --- rust_compiler/libs/tokenizer/src/token.rs | 15 ++ rust_compiler/src/ffi/mod.rs | 59 +++++- 8 files changed, 395 insertions(+), 178 deletions(-) diff --git a/csharp_mod/Extensions.cs b/csharp_mod/Extensions.cs index 764b6c9..02ebae3 100644 --- a/csharp_mod/Extensions.cs +++ b/csharp_mod/Extensions.cs @@ -1,6 +1,7 @@ namespace Slang; using System; +using System.Collections.Generic; using System.Text; using StationeersIC10Editor; @@ -53,7 +54,7 @@ public static unsafe class SlangExtensions var color = GetColorForKind(token.token_kind); - int colIndex = token.column; + int colIndex = token.column - 1; if (colIndex < 0) colIndex = 0; @@ -80,20 +81,50 @@ public static unsafe class SlangExtensions return list; } + public static unsafe List ToList(this Vec_FfiDiagnostic_t vec) + { + var toReturn = new List((int)vec.len); + + var currentPtr = vec.ptr; + + for (int i = 0; i < (int)vec.len; i++) + { + var item = currentPtr[i]; + + toReturn.Add( + new Slang.Diagnostic + { + Message = item.message.AsString(), + Severity = item.severity, + Range = new Slang.Range + { + EndCol = item.range.end_col - 1, + EndLine = item.range.end_line - 1, + StartCol = item.range.start_col - 1, + StartLine = item.range.end_line - 1, + }, + } + ); + } + + Ffi.free_ffi_diagnostic_vec(vec); + return toReturn; + } + private static uint GetColorForKind(uint kind) { switch (kind) { case 1: - return SlangFormatter.ColorInstruction; // Keyword - case 2: - return SlangFormatter.ColorDefault; // Identifier - case 3: - return SlangFormatter.ColorNumber; // Number - case 4: return SlangFormatter.ColorString; // String - case 5: + case 2: + return SlangFormatter.ColorString; // Number + case 3: return SlangFormatter.ColorInstruction; // Boolean + case 4: + return SlangFormatter.ColorInstruction; // Keyword + case 5: + return SlangFormatter.ColorInstruction; // Identifier case 6: return SlangFormatter.ColorDefault; // Symbol default: diff --git a/csharp_mod/FfiGlue.cs b/csharp_mod/FfiGlue.cs index 668bb96..5c49ef5 100644 --- a/csharp_mod/FfiGlue.cs +++ b/csharp_mod/FfiGlue.cs @@ -15,162 +15,160 @@ #pragma warning disable SA1500, SA1505, SA1507, #pragma warning disable SA1600, SA1601, SA1604, SA1605, SA1611, SA1615, SA1649, -namespace Slang -{ - using System; - using System.Runtime.InteropServices; +namespace Slang { +using System; +using System.Runtime.InteropServices; - public unsafe partial class Ffi - { +public unsafe partial class Ffi { #if IOS - private const string RustLib = "slang.framework/slang"; + private const string RustLib = "slang.framework/slang"; #else - public const string RustLib = "slang_compiler.dll"; + public const string RustLib = "slang_compiler.dll"; #endif - } +} + +/// +/// &'lt [T] but with a guaranteed #[repr(C)] layout. +/// +/// # C layout (for some given type T) +/// +/// ```c +/// typedef struct { +/// // Cannot be NULL +/// T * ptr; +/// size_t len; +/// } slice_T; +/// ``` +/// +/// # Nullable pointer? +/// +/// If you want to support the above typedef, but where the ptr field is +/// allowed to be NULL (with the contents of len then being undefined) +/// use the Option< slice_ptr<_> > type. +/// +[StructLayout(LayoutKind.Sequential, Size = 16)] +public unsafe struct slice_ref_uint16_t { + /// + /// Pointer to the first element (if any). + /// + public UInt16 /*const*/ * ptr; /// - /// &'lt [T] but with a guaranteed #[repr(C)] layout. - /// - /// # C layout (for some given type T) - /// - /// ```c - /// typedef struct { - /// // Cannot be NULL - /// T * ptr; - /// size_t len; - /// } slice_T; - /// ``` - /// - /// # Nullable pointer? - /// - /// If you want to support the above typedef, but where the ptr field is - /// allowed to be NULL (with the contents of len then being undefined) - /// use the Option< slice_ptr<_> > type. + /// Element count /// - [StructLayout(LayoutKind.Sequential, Size = 16)] - public unsafe struct slice_ref_uint16_t - { - /// - /// Pointer to the first element (if any). - /// - public UInt16 /*const*/ - * ptr; + public UIntPtr len; +} - /// - /// Element count - /// - public UIntPtr len; - } +/// +/// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout +/// +[StructLayout(LayoutKind.Sequential, Size = 24)] +public unsafe struct Vec_uint8_t { + public byte * ptr; + public UIntPtr len; + + public UIntPtr cap; +} + +public unsafe partial class Ffi { /// - /// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout + /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because + /// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. + /// This should result in the ability to compile many times without triggering frame drops + /// from the GC from a GetBytes() call on a string in C#. /// - [StructLayout(LayoutKind.Sequential, Size = 24)] - public unsafe struct Vec_uint8_t - { - public byte* ptr; + [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern + Vec_uint8_t compile_from_string ( + slice_ref_uint16_t input); +} - public UIntPtr len; +[StructLayout(LayoutKind.Sequential, Size = 16)] +public unsafe struct FfiRange_t { + public UInt32 start_col; - public UIntPtr cap; - } + public UInt32 end_col; - public unsafe partial class Ffi - { - /// - /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because - /// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. - /// This should result in the ability to compile many times without triggering frame drops - /// from the GC from a GetBytes() call on a string in C#. - /// - [DllImport(RustLib, ExactSpelling = true)] - public static extern unsafe Vec_uint8_t compile_from_string(slice_ref_uint16_t input); - } + public UInt32 start_line; - [StructLayout(LayoutKind.Sequential, Size = 16)] - public unsafe struct FfiRange_t - { - public UInt32 start_col; + public UInt32 end_line; +} - public UInt32 end_col; +[StructLayout(LayoutKind.Sequential, Size = 48)] +public unsafe struct FfiDiagnostic_t { + public Vec_uint8_t message; - public UInt32 start_line; + public Int32 severity; - public UInt32 end_line; - } + public FfiRange_t range; +} - [StructLayout(LayoutKind.Sequential, Size = 48)] - public unsafe struct FfiDiagnostic_t - { - public Vec_uint8_t message; +/// +/// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout +/// +[StructLayout(LayoutKind.Sequential, Size = 24)] +public unsafe struct Vec_FfiDiagnostic_t { + public FfiDiagnostic_t * ptr; - public Int32 severity; + public UIntPtr len; - public FfiRange_t range; - } + public UIntPtr cap; +} - /// - /// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout - /// - [StructLayout(LayoutKind.Sequential, Size = 24)] - public unsafe struct Vec_FfiDiagnostic_t - { - public FfiDiagnostic_t* ptr; +public unsafe partial class Ffi { + [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern + Vec_FfiDiagnostic_t diagnose_source ( + slice_ref_uint16_t input); +} - public UIntPtr len; +public unsafe partial class Ffi { + [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern + void free_ffi_diagnostic_vec ( + Vec_FfiDiagnostic_t v); +} - public UIntPtr cap; - } +[StructLayout(LayoutKind.Sequential, Size = 64)] +public unsafe struct FfiToken_t { + public Vec_uint8_t tooltip; - public unsafe partial class Ffi - { - [DllImport(RustLib, ExactSpelling = true)] - public static extern unsafe Vec_FfiDiagnostic_t diagnose_source(); - } + public Vec_uint8_t error; - public unsafe partial class Ffi - { - [DllImport(RustLib, ExactSpelling = true)] - public static extern unsafe void free_ffi_diagnostic_vec(Vec_FfiDiagnostic_t v); - } + public Int32 column; - [StructLayout(LayoutKind.Sequential, Size = 64)] - public unsafe struct FfiToken_t - { - public Vec_uint8_t tooltip; + public Int32 length; - public Vec_uint8_t error; + public UInt32 token_kind; +} - public Int32 column; +/// +/// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout +/// +[StructLayout(LayoutKind.Sequential, Size = 24)] +public unsafe struct Vec_FfiToken_t { + public FfiToken_t * ptr; - public Int32 length; + public UIntPtr len; - public UInt32 token_kind; - } + public UIntPtr cap; +} - /// - /// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout - /// - [StructLayout(LayoutKind.Sequential, Size = 24)] - public unsafe struct Vec_FfiToken_t - { - public FfiToken_t* ptr; +public unsafe partial class Ffi { + [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern + void free_ffi_token_vec ( + Vec_FfiToken_t v); +} - public UIntPtr len; +public unsafe partial class Ffi { + [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern + void free_string ( + Vec_uint8_t s); +} - public UIntPtr cap; - } +public unsafe partial class Ffi { + [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern + Vec_FfiToken_t tokenize_line ( + slice_ref_uint16_t input); +} - public unsafe partial class Ffi - { - [DllImport(RustLib, ExactSpelling = true)] - public static extern unsafe void free_ffi_token_vec(Vec_FfiToken_t v); - } - public unsafe partial class Ffi - { - [DllImport(RustLib, ExactSpelling = true)] - public static extern unsafe void free_string(Vec_uint8_t s); - } } /* Slang */ diff --git a/csharp_mod/Formatter.cs b/csharp_mod/Formatter.cs index b26d2cf..0db13ac 100644 --- a/csharp_mod/Formatter.cs +++ b/csharp_mod/Formatter.cs @@ -1,40 +1,138 @@ namespace Slang; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; using System.Timers; using StationeersIC10Editor; public class SlangFormatter : ICodeFormatter { - private Timer _timer; + private System.Timers.Timer _timer; + private CancellationTokenSource? _lspCancellationToken; + private readonly SynchronizationContext? _mainThreadContext; + private volatile bool IsDiagnosing = false; public static readonly uint ColorInstruction = ColorFromHTML("#ffff00"); public static readonly uint ColorString = ColorFromHTML("#ce9178"); + private object _textLock = new(); + public SlangFormatter() { - _timer = new Timer(250); + // 1. Capture the Main Thread context. + // This works because the Editor instantiates this class on the main thread. + _mainThreadContext = SynchronizationContext.Current; - this.OnCodeChanged += HandleCodeChanged; + _timer = new System.Timers.Timer(250); + _timer.AutoReset = false; } public override string Compile() { - L.Info("ICodeFormatter attempted to compile source code."); return this.Lines.RawText; } public override Line ParseLine(string line) { - return new Line(line); + HandleCodeChanged(); + return Marshal.TokenizeLine(line); } private void HandleCodeChanged() { - _timer.Stop(); - _timer.Dispose(); - _timer = new Timer(250); - _timer.Elapsed += (_, _) => HandleLsp(); + if (IsDiagnosing) + return; + + _lspCancellationToken?.Cancel(); + _lspCancellationToken?.Dispose(); + + _lspCancellationToken = new CancellationTokenSource(); + + _ = HandleLsp(_lspCancellationToken.Token, this.RawText); } - private void HandleLsp() { } + private void OnTimerElapsed(object sender, ElapsedEventArgs e) { } + + private async Task HandleLsp(CancellationToken cancellationToken, string text) + { + try + { + await Task.Delay(500, cancellationToken); + + if (cancellationToken.IsCancellationRequested) + return; + + List diagnosis = Marshal.DiagnoseSource(text); + + var dict = diagnosis + .GroupBy(d => d.Range.StartLine) + .ToDictionary(g => g.Key, g => g.ToList()); + + // 3. Dispatch the UI update to the Main Thread + if (_mainThreadContext != null) + { + // Post ensures ApplyDiagnostics runs on the captured thread (Main Thread) + _mainThreadContext.Post(_ => ApplyDiagnostics(dict), null); + } + else + { + // Fallback: If context is null (rare in Unity), try running directly + // but warn, as this might crash if not thread-safe. + L.Warning("SynchronizationContext was null. Attempting direct update (risky)."); + ApplyDiagnostics(dict); + } + } + finally { } + } + + // This runs on the Main Thread + private void ApplyDiagnostics(Dictionary> dict) + { + IsDiagnosing = true; + // Standard LSP uses 0-based indexing. + for (int i = 0; i < this.Lines.Count; i++) + { + uint lineIndex = (uint)i; + + if (dict.TryGetValue(lineIndex, out var lineDiagnostics)) + { + var line = this.Lines[i]; + if (line is null) + { + continue; + } + + var tokenMap = line.Tokens.ToDictionary((t) => t.Column); + + foreach (var diag in lineDiagnostics) + { + var newToken = new SemanticToken + { + Column = (int)diag.Range.StartCol, + Length = (int)(diag.Range.EndCol - diag.Range.StartCol), + Line = i, + IsError = true, + Data = diag.Message, + Color = ICodeFormatter.ColorError, + }; + + L.Info( + $"Col: {newToken.Column} -- Length: {newToken.Length} -- Msg: {newToken.Data}" + ); + + tokenMap[newToken.Column] = newToken; + } + + line.ClearTokens(); + + foreach (var token in tokenMap.Values) + { + line.AddToken(token); + } + } + } + IsDiagnosing = false; + } } diff --git a/csharp_mod/Marshal.cs b/csharp_mod/Marshal.cs index ffb6a58..c058e6b 100644 --- a/csharp_mod/Marshal.cs +++ b/csharp_mod/Marshal.cs @@ -1,11 +1,27 @@ namespace Slang; using System; +using System.Collections.Generic; using System.IO; using System.Reflection; using System.Runtime.InteropServices; using StationeersIC10Editor; +public struct Range +{ + public uint StartCol; + public uint EndCol; + public uint StartLine; + public uint EndLine; +} + +public struct Diagnostic +{ + public string Message; + public int Severity; + public Range Range; +} + public static class Marshal { private static IntPtr _libraryHandle = IntPtr.Zero; @@ -63,13 +79,7 @@ public static class Marshal public static unsafe bool CompileFromString(string inputString, out string compiledString) { - if (String.IsNullOrEmpty(inputString)) - { - compiledString = String.Empty; - return false; - } - - if (!EnsureLibLoaded()) + if (String.IsNullOrEmpty(inputString) || !EnsureLibLoaded()) { compiledString = String.Empty; return false; @@ -101,6 +111,46 @@ public static class Marshal } } + public static unsafe List DiagnoseSource(string inputString) + { + if (string.IsNullOrEmpty(inputString) || !EnsureLibLoaded()) + { + return new(); + } + + fixed (char* ptrInput = inputString) + { + var input = new slice_ref_uint16_t + { + ptr = (ushort*)ptrInput, + len = (UIntPtr)inputString.Length, + }; + + return Ffi.diagnose_source(input).ToList(); + } + } + + public static unsafe Line TokenizeLine(string inputString) + { + if (string.IsNullOrEmpty(inputString) || !EnsureLibLoaded()) + { + return new Line(inputString); + } + + fixed (char* ptrInputStr = inputString) + { + var strRef = new slice_ref_uint16_t + { + len = (UIntPtr)inputString.Length, + ptr = (ushort*)ptrInputStr, + }; + + var tokens = Ffi.tokenize_line(strRef); + + return tokens.ToLine(inputString); + } + } + private static string ExtractNativeLibrary(string libName) { string destinationPath = Path.Combine(Path.GetTempPath(), libName); diff --git a/csharp_mod/Patches.cs b/csharp_mod/Patches.cs index d5fc8bc..276f1cc 100644 --- a/csharp_mod/Patches.cs +++ b/csharp_mod/Patches.cs @@ -1,11 +1,9 @@ namespace Slang; using System; -using Assets.Scripts; using Assets.Scripts.Objects; using Assets.Scripts.Objects.Electrical; using Assets.Scripts.Objects.Motherboards; -using Assets.Scripts.UI; using HarmonyLib; [HarmonyPatch] diff --git a/csharp_mod/Plugin.cs b/csharp_mod/Plugin.cs index 23d24e1..d4a8740 100644 --- a/csharp_mod/Plugin.cs +++ b/csharp_mod/Plugin.cs @@ -1,7 +1,3 @@ -using System; -using System.IO; -using System.IO.Compression; -using System.Text; using System.Text.RegularExpressions; using BepInEx; using HarmonyLib; @@ -65,28 +61,6 @@ namespace Slang } } - /// - /// Encodes the original slang source code as base64 and uses gzip to compress it, returning the resulting string. - /// - public static string EncodeSource(string source) - { - if (string.IsNullOrEmpty(source)) - { - return ""; - } - - byte[] bytes = Encoding.UTF8.GetBytes(source); - - using (var memoryStream = new MemoryStream()) - { - using (var gzipStream = new GZipStream(memoryStream, CompressionMode.Compress)) - { - gzipStream.Write(bytes, 0, bytes.Length); - } - return Convert.ToBase64String(memoryStream.ToArray()); - } - } - public static bool IsSlangSource(ref string input) { return SlangSourceCheck.IsMatch(input); diff --git a/rust_compiler/libs/tokenizer/src/token.rs b/rust_compiler/libs/tokenizer/src/token.rs index c5bed81..b471d6d 100644 --- a/rust_compiler/libs/tokenizer/src/token.rs +++ b/rust_compiler/libs/tokenizer/src/token.rs @@ -87,6 +87,21 @@ pub enum TokenType { EOF, } +impl From for u32 { + fn from(value: TokenType) -> Self { + use TokenType::*; + match value { + String(_) => 1, + Number(_) => 2, + Boolean(_) => 3, + Keyword(_) => 4, + Identifier(_) => 5, + Symbol(_) => 6, + EOF => 0, + } + } +} + impl std::fmt::Display for TokenType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/rust_compiler/src/ffi/mod.rs b/rust_compiler/src/ffi/mod.rs index 4754a20..84159cf 100644 --- a/rust_compiler/src/ffi/mod.rs +++ b/rust_compiler/src/ffi/mod.rs @@ -2,7 +2,10 @@ use compiler::Compiler; use parser::Parser; use safer_ffi::prelude::*; use std::io::BufWriter; -use tokenizer::Tokenizer; +use tokenizer::{ + token::{Token, TokenType}, + Tokenizer, +}; #[derive_ReprC] #[repr(C)] @@ -98,6 +101,56 @@ pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi:: } #[ffi_export] -pub fn diagnose_source() -> safer_ffi::Vec { - vec![].into() +pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { + let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + + let mut tokens = Vec::new(); + + // Error reporting is handled in `diagnose_source`. We only care about successful tokens here + // for syntax highlighting + for token in tokenizer { + if matches!( + token, + Ok(Token { + token_type: TokenType::EOF, + .. + }) + ) { + continue; + } + match token { + Err(_) => {} + Ok(Token { + column, + original_string, + token_type, + .. + }) => tokens.push(FfiToken { + column: column as i32, + error: "".into(), + length: (original_string.unwrap_or_default().len()) as i32, + token_kind: token_type.into(), + tooltip: "".into(), + }), + } + } + + tokens.into() +} + +#[ffi_export] +pub fn diagnose_source(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { + let mut writer = BufWriter::new(Vec::new()); + let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let compiler = Compiler::new(Parser::new(tokenizer), &mut writer, None); + + let diagnosis = compiler.compile(); + + let mut result_vec: Vec = Vec::with_capacity(diagnosis.len()); + + for err in diagnosis { + result_vec.push(lsp_types::Diagnostic::from(err).into()); + } + + result_vec.into() } From 8ea274f3bf76115c12af53252c64365032cd3520 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Mon, 1 Dec 2025 14:50:05 -0700 Subject: [PATCH 09/12] working in-game error diagnostics. memory access violation bug present. Need to debug --- csharp_mod/Extensions.cs | 8 +- csharp_mod/Formatter.cs | 100 ++++++++++++------------ rust_compiler/libs/parser/src/lib.rs | 8 +- rust_compiler/libs/tokenizer/src/lib.rs | 8 +- rust_compiler/src/ffi/mod.rs | 21 ++++- 5 files changed, 81 insertions(+), 64 deletions(-) diff --git a/csharp_mod/Extensions.cs b/csharp_mod/Extensions.cs index 02ebae3..35416fb 100644 --- a/csharp_mod/Extensions.cs +++ b/csharp_mod/Extensions.cs @@ -98,9 +98,9 @@ public static unsafe class SlangExtensions Severity = item.severity, Range = new Slang.Range { - EndCol = item.range.end_col - 1, + EndCol = Math.Max(item.range.end_col - 2, 0), EndLine = item.range.end_line - 1, - StartCol = item.range.start_col - 1, + StartCol = Math.Max(item.range.start_col - 2, 0), StartLine = item.range.end_line - 1, }, } @@ -122,9 +122,9 @@ public static unsafe class SlangExtensions case 3: return SlangFormatter.ColorInstruction; // Boolean case 4: - return SlangFormatter.ColorInstruction; // Keyword + return SlangFormatter.ColorSelection; // Keyword case 5: - return SlangFormatter.ColorInstruction; // Identifier + return SlangFormatter.ColorLineNumber; // Identifier case 6: return SlangFormatter.ColorDefault; // Symbol default: diff --git a/csharp_mod/Formatter.cs b/csharp_mod/Formatter.cs index 0db13ac..a3dc844 100644 --- a/csharp_mod/Formatter.cs +++ b/csharp_mod/Formatter.cs @@ -1,5 +1,6 @@ namespace Slang; +using System; using System.Collections.Generic; using System.Linq; using System.Threading; @@ -9,7 +10,6 @@ using StationeersIC10Editor; public class SlangFormatter : ICodeFormatter { - private System.Timers.Timer _timer; private CancellationTokenSource? _lspCancellationToken; private readonly SynchronizationContext? _mainThreadContext; private volatile bool IsDiagnosing = false; @@ -17,16 +17,13 @@ public class SlangFormatter : ICodeFormatter public static readonly uint ColorInstruction = ColorFromHTML("#ffff00"); public static readonly uint ColorString = ColorFromHTML("#ce9178"); - private object _textLock = new(); + private HashSet _linesWithErrors = new(); public SlangFormatter() { // 1. Capture the Main Thread context. // This works because the Editor instantiates this class on the main thread. _mainThreadContext = SynchronizationContext.Current; - - _timer = new System.Timers.Timer(250); - _timer.AutoReset = false; } public override string Compile() @@ -50,89 +47,94 @@ public class SlangFormatter : ICodeFormatter _lspCancellationToken = new CancellationTokenSource(); - _ = HandleLsp(_lspCancellationToken.Token, this.RawText); + _ = Task.Run(() => HandleLsp(_lspCancellationToken.Token), _lspCancellationToken.Token); } private void OnTimerElapsed(object sender, ElapsedEventArgs e) { } - private async Task HandleLsp(CancellationToken cancellationToken, string text) + private async Task HandleLsp(CancellationToken cancellationToken) { try { - await Task.Delay(500, cancellationToken); + await Task.Delay(200, cancellationToken); if (cancellationToken.IsCancellationRequested) + { return; - - List diagnosis = Marshal.DiagnoseSource(text); - - var dict = diagnosis - .GroupBy(d => d.Range.StartLine) - .ToDictionary(g => g.Key, g => g.ToList()); + } // 3. Dispatch the UI update to the Main Thread if (_mainThreadContext != null) { // Post ensures ApplyDiagnostics runs on the captured thread (Main Thread) - _mainThreadContext.Post(_ => ApplyDiagnostics(dict), null); + _mainThreadContext.Post(_ => ApplyDiagnostics(), null); } else { // Fallback: If context is null (rare in Unity), try running directly // but warn, as this might crash if not thread-safe. L.Warning("SynchronizationContext was null. Attempting direct update (risky)."); - ApplyDiagnostics(dict); + ApplyDiagnostics(); } } finally { } } // This runs on the Main Thread - private void ApplyDiagnostics(Dictionary> dict) + private void ApplyDiagnostics() { + List diagnosis = Marshal.DiagnoseSource(this.RawText); + + var dict = diagnosis.GroupBy(d => d.Range.StartLine).ToDictionary(g => g.Key); + + var linesToRefresh = new HashSet(dict.Keys); + linesToRefresh.UnionWith(_linesWithErrors); + IsDiagnosing = true; - // Standard LSP uses 0-based indexing. - for (int i = 0; i < this.Lines.Count; i++) + + foreach (var lineIndex in linesToRefresh) { - uint lineIndex = (uint)i; + // safety check for out of bounds (in case lines were deleted) + if (lineIndex >= this.Lines.Count) + continue; - if (dict.TryGetValue(lineIndex, out var lineDiagnostics)) + var line = this.Lines[(int)lineIndex]; + + if (line is null) + continue; + + line.ClearTokens(); + + Dictionary lineDict = Marshal + .TokenizeLine(line.Text) + .Tokens.ToDictionary((t) => t.Column); + + if (dict.ContainsKey(lineIndex)) { - var line = this.Lines[i]; - if (line is null) + foreach (var lineDiagnostic in dict[lineIndex]) { - continue; - } - - var tokenMap = line.Tokens.ToDictionary((t) => t.Column); - - foreach (var diag in lineDiagnostics) - { - var newToken = new SemanticToken + lineDict[(int)lineDiagnostic.Range.StartCol] = new SemanticToken { - Column = (int)diag.Range.StartCol, - Length = (int)(diag.Range.EndCol - diag.Range.StartCol), - Line = i, + Column = Math.Abs((int)lineDiagnostic.Range.StartCol), + Length = Math.Abs( + (int)(lineDiagnostic.Range.EndCol - lineDiagnostic.Range.StartCol) + ), + Line = (int)lineIndex, IsError = true, - Data = diag.Message, - Color = ICodeFormatter.ColorError, + Data = lineDiagnostic.Message, + Color = SlangFormatter.ColorError, }; - - L.Info( - $"Col: {newToken.Column} -- Length: {newToken.Length} -- Msg: {newToken.Data}" - ); - - tokenMap[newToken.Column] = newToken; - } - - line.ClearTokens(); - - foreach (var token in tokenMap.Values) - { - line.AddToken(token); } } + + foreach (var token in lineDict.Values) + { + line.AddToken(token); + } } + + _linesWithErrors = new HashSet(dict.Keys); + IsDiagnosing = false; } } diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index 85496d9..d82ec44 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -31,16 +31,16 @@ quick_error! { source(err) } UnexpectedToken(span: Span, token: Token) { - display("Unexpected token: {:?}", token) + display("Unexpected token: {}", token.token_type) } DuplicateIdentifier(span: Span, token: Token) { - display("Duplicate identifier: {:?}", token) + display("Duplicate identifier: {}", token.token_type) } InvalidSyntax(span: Span, reason: String) { - display("Invalid syntax: {:?}, Reason: {}", span, reason) + display("Invalid syntax: {}", reason) } UnsupportedKeyword(span: Span, token: Token) { - display("Unsupported keyword: {:?}", token) + display("Unsupported keyword: {}", token.token_type) } UnexpectedEOF { display("Unexpected EOF") diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs index 9434c2d..c6a5fba 100644 --- a/rust_compiler/libs/tokenizer/src/lib.rs +++ b/rust_compiler/libs/tokenizer/src/lib.rs @@ -19,18 +19,18 @@ quick_error! { source(err) } NumberParseError(err: std::num::ParseIntError, line: usize, column: usize, original: String) { - display("Number Parse Error: {}\nLine: {}, Column: {}", err, line, column) + display("Number Parse Error: {}", err) source(err) } DecimalParseError(err: rust_decimal::Error, line: usize, column: usize, original: String) { - display("Decimal Parse Error: {}\nLine: {}, Column: {}", err, line, column) + display("Decimal Parse Error: {}", err) source(err) } UnknownSymbolError(char: char, line: usize, column: usize, original: String) { - display("Unknown Symbol: {}\nLine: {}, Column: {}", char, line, column) + display("Unknown Symbol: {}", char) } UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize, original: String) { - display("Unknown Keyword or Identifier: {}\nLine: {}, Column: {}", val, line, column) + display("Unknown Keyword or Identifier: {}", val) } } } diff --git a/rust_compiler/src/ffi/mod.rs b/rust_compiler/src/ffi/mod.rs index 84159cf..a132297 100644 --- a/rust_compiler/src/ffi/mod.rs +++ b/rust_compiler/src/ffi/mod.rs @@ -106,8 +106,6 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec) -> safer_ffi::Vec {} + Err(ref e) => { + use tokenizer::Error::*; + let (err_str, col, og) = match e { + NumberParseError(_, _, col, og) + | DecimalParseError(_, _, col, og) + | UnknownSymbolError(_, _, col, og) + | UnknownKeywordOrIdentifierError(_, _, col, og) => (e.to_string(), col, og), + _ => continue, + }; + + tokens.push(FfiToken { + column: *col as i32, + error: err_str.into(), + tooltip: "".into(), + length: og.len() as i32, + token_kind: 0, + }) + } Ok(Token { column, original_string, From 0977d3d0d53e99194dc90b06c094d2a3f4f44e41 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Mon, 1 Dec 2025 15:06:53 -0700 Subject: [PATCH 10/12] Remove unwrap() in favor of ok_or() --- 2 | 1553 ++++++++++++++++++++++++++ rust_compiler/libs/parser/src/lib.rs | 45 +- 2 files changed, 1577 insertions(+), 21 deletions(-) create mode 100644 2 diff --git a/2 b/2 new file mode 100644 index 0000000..18f06ed --- /dev/null +++ b/2 @@ -0,0 +1,1553 @@ +#[cfg(test)] +mod test; + +pub mod sys_call; +pub mod tree_node; + +use crate::sys_call::System; +use quick_error::quick_error; +use std::io::SeekFrom; +use sys_call::SysCall; +use tokenizer::{ + self, Tokenizer, TokenizerBuffer, + token::{Keyword, Symbol, Token, TokenType}, +}; +use tree_node::*; + +#[macro_export] +/// A macro to create a boxed value. +macro_rules! boxed { + ($e:expr) => { + Box::new($e) + }; +} + +quick_error! { + #[derive(Debug)] + pub enum Error { + TokenizerError(err: tokenizer::Error) { + from() + display("Tokenizer Error: {}", err) + source(err) + } + UnexpectedToken(span: Span, token: Token) { + display("Unexpected token: {}", token.token_type) + } + DuplicateIdentifier(span: Span, token: Token) { + display("Duplicate identifier: {}", token.token_type) + } + InvalidSyntax(span: Span, reason: String) { + display("Invalid syntax: {}", reason) + } + UnsupportedKeyword(span: Span, token: Token) { + display("Unsupported keyword: {}", token.token_type) + } + UnexpectedEOF { + display("Unexpected EOF") + } + } +} + +impl From for lsp_types::Diagnostic { + fn from(value: Error) -> Self { + use Error::*; + use lsp_types::*; + match value { + TokenizerError(e) => e.into(), + UnexpectedToken(span, _) + | DuplicateIdentifier(span, _) + | InvalidSyntax(span, _) + | UnsupportedKeyword(span, _) => Diagnostic { + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + range: span.into(), + ..Default::default() + }, + UnexpectedEOF => Diagnostic { + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + } + } +} + +macro_rules! self_matches_peek { + ($self:ident, $pattern:pat) => { + matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. })) + }; + ($self:ident, $pattern:pat if $cond:expr) => { + matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond) + }; +} + +macro_rules! token_matches { + ($token:ident, $pattern:pat) => { + matches!($token.token_type, $pattern) + }; + ($token:expr, $pattern:pat) => { + matches!($token.token_type, $pattern) + }; + ($token:ident, $pattern:pat if $cond:expr) => { + matches!($token.token_type, $pattern if $cond) + }; + ($token:expr, $pattern:pat if $cond:expr) => { + matches!($token.token_type, $pattern if $cond) + }; +} + +macro_rules! self_matches_current { + ($self:ident, $pattern:pat) => { + matches!($self.current_token, Some(Token { token_type: $pattern, .. })) + }; + ($self:ident, $pattern:pat if $cond:expr) => { + matches!($self.current_token, Some(Token { token_type: $pattern, .. }) if $cond) + }; +} + +pub struct Parser<'a> { + tokenizer: TokenizerBuffer<'a>, + current_token: Option, + pub errors: Vec, +} + +impl<'a> Parser<'a> { + pub fn new(tokenizer: Tokenizer<'a>) -> Self { + Parser { + tokenizer: TokenizerBuffer::new(tokenizer), + current_token: None, + errors: Vec::new(), + } + } + + /// Calculates a Span from a given Token reference. + /// This is a static helper to avoid borrowing `self` when we already have a token ref. + fn token_to_span(t: &Token) -> Span { + let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); + Span { + start_line: t.line, + start_col: t.column, + end_line: t.line, + end_col: t.column + len, + } + } + + fn current_span(&self) -> Span { + self.current_token + .as_ref() + .map(Self::token_to_span) + .unwrap_or(Span { + start_line: 0, + start_col: 0, + end_line: 0, + end_col: 0, + }) + } + + /// Helper to run a parsing closure and wrap the result in a Spanned struct + fn spanned(&mut self, parser: F) -> Result, Error> + where + F: FnOnce(&mut Self) -> Result, + { + // Peek at the start token. If no current token (parsing hasn't started), peek the buffer. + let start_token = if self.current_token.is_some() { + self.current_token.clone() + } else { + self.tokenizer.peek()? + }; + + let (start_line, start_col) = start_token + .as_ref() + .map(|t| (t.line, t.column)) + .unwrap_or((1, 1)); + + let node = parser(self)?; + + // The end token is the current_token after parsing. + let end_token = self.current_token.as_ref(); + + let (end_line, end_col) = end_token + .map(|t| { + let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); + (t.line, t.column + len) + }) + .unwrap_or((start_line, start_col)); + + Ok(Spanned { + span: Span { + start_line, + start_col, + end_line, + end_col, + }, + node, + }) + } + + /// Skips tokens until a statement boundary is found to recover from errors. + fn synchronize(&mut self) -> Result<(), Error> { + // We advance once to consume the error-causing token if we haven't already + // But often the error happens after we consumed something. + // Safe bet: consume current, then look. + + // If we assign next, we might be skipping the very token we want to sync on if the error didn't consume it? + // Usually, in recursive descent, the error is raised when `current` is unexpected. + // We want to discard `current` and move on. + self.assign_next()?; + + while let Some(token) = &self.current_token { + if token.token_type == TokenType::Symbol(Symbol::Semicolon) { + // Consuming the semicolon is a good place to stop and resume parsing next statement + self.assign_next()?; + return Ok(()); + } + + // Check if the token looks like the start of a statement. + // If so, we don't consume it; we return so the loop in parse_all can try to parse it. + match token.token_type { + TokenType::Keyword(Keyword::Fn) + | TokenType::Keyword(Keyword::Let) + | TokenType::Keyword(Keyword::If) + | TokenType::Keyword(Keyword::While) + | TokenType::Keyword(Keyword::Loop) + | TokenType::Keyword(Keyword::Device) + | TokenType::Keyword(Keyword::Return) => return Ok(()), + _ => {} + } + + self.assign_next()?; + } + + Ok(()) + } + + pub fn parse_all(&mut self) -> Result, Error> { + let first_token = self.tokenizer.peek().unwrap_or(None); + let (start_line, start_col) = first_token + .as_ref() + .map(|tok| (tok.line, tok.column)) + .unwrap_or((1, 1)); + + let mut expressions = Vec::>::new(); + + loop { + // Check EOF without unwrapping error + match self.tokenizer.peek() { + Ok(None) => break, + Err(e) => { + self.errors.push(Error::TokenizerError(e)); + break; + } + _ => {} + } + + match self.parse() { + Ok(Some(expression)) => { + expressions.push(expression); + } + Ok(None) => break, + Err(e) => { + self.errors.push(e); + // Recover + if self.synchronize().is_err() { + // If sync failed (e.g. EOF during sync), break + break; + } + } + } + } + + // Even if we had errors, we return whatever partial AST we managed to build. + // If expressions is empty and we had errors, it's a failed parse, but we return a block. + + // Use the last token position for end span, or start if nothing parsed + let end_token_opt = self.tokenizer.peek().unwrap_or(None); + let (end_line, end_col) = end_token_opt + .map(|tok| { + let len = tok.original_string.as_ref().map(|s| s.len()).unwrap_or(0); + (tok.line, tok.column + len) + }) + .unwrap_or((start_line, start_col)); + + let span = Span { + start_line, + end_line, + start_col, + end_col, + }; + + Ok(Some(Expression::Block(Spanned { + node: BlockExpression(expressions), + span, + }))) + } + + pub fn parse(&mut self) -> Result>, Error> { + self.assign_next()?; + + // If assign_next hit EOF or error? + if self.current_token.is_none() { + return Ok(None); + } + + let expr = self.expression()?; + + if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) { + self.assign_next()?; + } + + Ok(expr) + } + + fn assign_next(&mut self) -> Result<(), Error> { + self.current_token = self.tokenizer.next_token()?; + Ok(()) + } + + fn get_next(&mut self) -> Result, Error> { + self.assign_next()?; + Ok(self.current_token.as_ref()) + } + + fn expression(&mut self) -> Result>, Error> { + // Parse the Left Hand Side (unary/primary expression) + let lhs = self.unary()?; + + let Some(lhs) = lhs else { + return Ok(None); + }; + + // check if the next or current token is an operator, comparison, or logical symbol + if self_matches_peek!( + self, + TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() + ) { + return Ok(Some(self.infix(lhs)?)); + } else if self_matches_current!( + self, + TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() + ) { + self.tokenizer.seek(SeekFrom::Current(-1))?; + return Ok(Some(self.infix(lhs)?)); + } + + Ok(Some(lhs)) + } + + fn unary(&mut self) -> Result>, Error> { + macro_rules! matches_keyword { + ($keyword:expr, $($pattern:pat),+) => { + matches!($keyword, $($pattern)|+) + }; + } + + let Some(current_token) = self.current_token.as_ref() else { + return Ok(None); + }; + + if token_matches!(current_token, TokenType::EOF) { + return Ok(None); + } + + let expr = match current_token.token_type { + TokenType::Keyword(e) if matches_keyword!(e, Keyword::Enum) => { + return Err(Error::UnsupportedKeyword( + self.current_span(), + current_token.clone(), + )); + } + + TokenType::Keyword(Keyword::Let) => { + // declaration is wrapped in spanned inside the function, but expects 'let' to be current + Some(self.spanned(|p| p.declaration())?) + } + + TokenType::Keyword(Keyword::Device) => { + let spanned_dev = self.spanned(|p| p.device())?; + Some(Spanned { + span: spanned_dev.span, + node: Expression::DeviceDeclaration(spanned_dev), + }) + } + + TokenType::Keyword(Keyword::Fn) => { + let spanned_fn = self.spanned(|p| p.function())?; + Some(Spanned { + span: spanned_fn.span, + node: Expression::Function(spanned_fn), + }) + } + + TokenType::Keyword(Keyword::If) => { + let spanned_if = self.spanned(|p| p.if_expression())?; + Some(Spanned { + span: spanned_if.span, + node: Expression::If(spanned_if), + }) + } + + TokenType::Keyword(Keyword::Loop) => { + let spanned_loop = self.spanned(|p| p.loop_expression())?; + Some(Spanned { + span: spanned_loop.span, + node: Expression::Loop(spanned_loop), + }) + } + + TokenType::Keyword(Keyword::While) => { + let spanned_while = self.spanned(|p| p.while_expression())?; + Some(Spanned { + span: spanned_while.span, + node: Expression::While(spanned_while), + }) + } + + TokenType::Keyword(Keyword::Break) => { + let span = self.current_span(); + // make sure the next token is a semi-colon + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + Some(Spanned { + span, + node: Expression::Break(span), + }) + } + + TokenType::Keyword(Keyword::Continue) => { + let span = self.current_span(); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + Some(Spanned { + span, + node: Expression::Continue(span), + }) + } + + TokenType::Identifier(ref id) if SysCall::is_syscall(id) => { + let spanned_call = self.spanned(|p| p.syscall())?; + Some(Spanned { + span: spanned_call.span, + node: Expression::Syscall(spanned_call), + }) + } + + TokenType::Identifier(_) + if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => + { + let spanned_invoke = self.spanned(|p| p.invocation())?; + Some(Spanned { + span: spanned_invoke.span, + node: Expression::Invocation(spanned_invoke), + }) + } + + TokenType::Identifier(_) + if self_matches_peek!(self, TokenType::Symbol(Symbol::Assign)) => + { + let spanned_assign = self.spanned(|p| p.assignment())?; + Some(Spanned { + span: spanned_assign.span, + node: Expression::Assignment(spanned_assign), + }) + } + + TokenType::Identifier(ref id) => { + let span = self.current_span(); + Some(Spanned { + span, + node: Expression::Variable(Spanned { + span, + node: id.clone(), + }), + }) + } + + TokenType::Symbol(Symbol::LBrace) => { + let spanned_block = self.spanned(|p| p.block())?; + Some(Spanned { + span: spanned_block.span, + node: Expression::Block(spanned_block), + }) + } + + TokenType::Number(_) | TokenType::String(_) | TokenType::Boolean(_) => { + let spanned_lit = self.spanned(|p| p.literal())?; + Some(Spanned { + span: spanned_lit.span, + node: Expression::Literal(spanned_lit), + }) + } + + TokenType::Symbol(Symbol::LParen) => { + // Priority handles its own spanning + self.spanned(|p| p.priority())?.node.map(|node| *node) + } + + TokenType::Symbol(Symbol::Minus) => { + // Need to handle span manually because unary call is next + let start_span = self.current_span(); + self.assign_next()?; + let inner_expr = self.unary()?.ok_or(Error::UnexpectedEOF)?; + let combined_span = Span { + start_line: start_span.start_line, + start_col: start_span.start_col, + end_line: inner_expr.span.end_line, + end_col: inner_expr.span.end_col, + }; + Some(Spanned { + span: combined_span, + node: Expression::Negation(boxed!(inner_expr)), + }) + } + + TokenType::Symbol(Symbol::LogicalNot) => { + let start_span = self.current_span(); + self.assign_next()?; + let inner_expr = self.unary()?.ok_or(Error::UnexpectedEOF)?; + let combined_span = Span { + start_line: start_span.start_line, + start_col: start_span.start_col, + end_line: inner_expr.span.end_line, + end_col: inner_expr.span.end_col, + }; + Some(Spanned { + span: combined_span, + node: Expression::Logical(Spanned { + span: combined_span, + node: LogicalExpression::Not(boxed!(inner_expr)), + }), + }) + } + + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); + } + }; + + Ok(expr) + } + + fn get_infix_child_node(&mut self) -> Result, Error> { + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; + + match current_token.token_type { + TokenType::Number(_) | TokenType::Boolean(_) => { + let lit = self.spanned(|p| p.literal())?; + Ok(Spanned { + span: lit.span, + node: Expression::Literal(lit), + }) + } + TokenType::Identifier(ref ident) + if !self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => + { + let span = self.current_span(); + Ok(Spanned { + span, + node: Expression::Variable(Spanned { + span, + node: ident.clone(), + }), + }) + } + TokenType::Symbol(Symbol::LParen) => Ok(*self + .spanned(|p| p.priority())? + .node + .ok_or(Error::UnexpectedEOF)?), + TokenType::Identifier(_) + if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => + { + let inv = self.spanned(|p| p.invocation())?; + Ok(Spanned { + span: inv.span, + node: Expression::Invocation(inv), + }) + } + TokenType::Symbol(Symbol::Minus) => { + let start_span = self.current_span(); + self.assign_next()?; + let inner = self.get_infix_child_node()?; + let span = Span { + start_line: start_span.start_line, + start_col: start_span.start_col, + end_line: inner.span.end_line, + end_col: inner.span.end_col, + }; + Ok(Spanned { + span, + node: Expression::Negation(boxed!(inner)), + }) + } + TokenType::Symbol(Symbol::LogicalNot) => { + let start_span = self.current_span(); + self.assign_next()?; + let inner = self.get_infix_child_node()?; + let span = Span { + start_line: start_span.start_line, + start_col: start_span.start_col, + end_line: inner.span.end_line, + end_col: inner.span.end_col, + }; + Ok(Spanned { + span, + node: Expression::Logical(Spanned { + span, + node: LogicalExpression::Not(boxed!(inner)), + }), + }) + } + _ => Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )), + } + } + + fn device(&mut self) -> Result { + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; + if !self_matches_current!(self, TokenType::Keyword(Keyword::Device)) { + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); + } + + let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let identifier_span = Self::token_to_span(identifier_token); + let identifier = match identifier_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(identifier_token), + identifier_token.clone(), + )); + } + }; + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); + } + + let device_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let device = match device_token.token_type { + TokenType::String(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(device_token), + device_token.clone(), + )); + } + }; + + Ok(DeviceDeclarationExpression { + name: Spanned { + span: identifier_span, + node: identifier, + }, + device, + }) + } + + fn assignment(&mut self) -> Result { + let identifier_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; + let identifier_span = Self::token_to_span(identifier_token); + let identifier = match identifier_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )); + } + }; + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); + if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(¤t_token), + current_token.clone(), + )); + } + self.assign_next()?; + + let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; + + Ok(AssignmentExpression { + identifier: Spanned { + span: identifier_span, + node: identifier, + }, + expression: boxed!(expression), + }) + } + + fn infix(&mut self, previous: Spanned) -> Result, Error> { + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); + + match previous.node { + Expression::Binary(_) + | Expression::Logical(_) + | Expression::Invocation(_) + | Expression::Priority(_) + | Expression::Literal(_) + | Expression::Variable(_) + | Expression::Negation(_) => {} + _ => { + return Err(Error::InvalidSyntax( + self.current_span(), + String::from("Invalid expression for binary/logical operation"), + )); + } + } + + let mut expressions = vec![previous]; + let mut operators = Vec::::new(); + + let mut temp_token = current_token.clone(); + + while token_matches!( + temp_token, + TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() + ) { + let operator = match temp_token.token_type { + TokenType::Symbol(s) => s, + _ => unreachable!(), + }; + operators.push(operator); + self.assign_next()?; + expressions.push(self.get_infix_child_node()?); + + temp_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); + } + + if operators.len() != expressions.len() - 1 { + return Err(Error::InvalidSyntax( + self.current_span(), + String::from("Invalid number of operators"), + )); + } + + // --- PRECEDENCE LEVEL 1: Exponent (**) --- + for (i, operator) in operators.iter().enumerate().rev() { + if operator == &Symbol::Exp { + let right = expressions.remove(i + 1); + let left = expressions.remove(i); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; + expressions.insert( + i, + Spanned { + span, + node: Expression::Binary(Spanned { + span, + node: BinaryExpression::Exponent(boxed!(left), boxed!(right)), + }), + }, + ); + } + } + operators.retain(|symbol| symbol != &Symbol::Exp); + + // Common macro for binary ops + macro_rules! process_binary_ops { + ($ops:pat, $variant:ident) => { + let mut current_iteration = 0; + for (i, operator) in operators.iter().enumerate() { + if matches!(operator, $ops) { + let index = i - current_iteration; + let left = expressions.remove(index); + let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; + + let node = match operator { + Symbol::Asterisk => { + BinaryExpression::Multiply(boxed!(left), boxed!(right)) + } + Symbol::Slash => BinaryExpression::Divide(boxed!(left), boxed!(right)), + Symbol::Percent => { + BinaryExpression::Modulo(boxed!(left), boxed!(right)) + } + Symbol::Plus => BinaryExpression::Add(boxed!(left), boxed!(right)), + Symbol::Minus => { + BinaryExpression::Subtract(boxed!(left), boxed!(right)) + } + _ => unreachable!(), + }; + + expressions.insert( + index, + Spanned { + span, + node: Expression::Binary(Spanned { span, node }), + }, + ); + current_iteration += 1; + } + } + operators.retain(|symbol| !matches!(symbol, $ops)); + }; + } + + // --- PRECEDENCE LEVEL 2: Multiplicative (*, /, %) --- + process_binary_ops!( + Symbol::Slash | Symbol::Asterisk | Symbol::Percent, + BinaryExpression + ); + + // --- PRECEDENCE LEVEL 3: Additive (+, -) --- + process_binary_ops!(Symbol::Plus | Symbol::Minus, BinaryExpression); + + // --- PRECEDENCE LEVEL 4: Comparison (<, >, <=, >=) --- + let mut current_iteration = 0; + for (i, operator) in operators.iter().enumerate() { + if operator.is_comparison() && !matches!(operator, Symbol::Equal | Symbol::NotEqual) { + let index = i - current_iteration; + let left = expressions.remove(index); + let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; + + let node = match operator { + Symbol::LessThan => LogicalExpression::LessThan(boxed!(left), boxed!(right)), + Symbol::GreaterThan => { + LogicalExpression::GreaterThan(boxed!(left), boxed!(right)) + } + Symbol::LessThanOrEqual => { + LogicalExpression::LessThanOrEqual(boxed!(left), boxed!(right)) + } + Symbol::GreaterThanOrEqual => { + LogicalExpression::GreaterThanOrEqual(boxed!(left), boxed!(right)) + } + _ => unreachable!(), + }; + + expressions.insert( + index, + Spanned { + span, + node: Expression::Logical(Spanned { span, node }), + }, + ); + current_iteration += 1; + } + } + operators.retain(|symbol| { + !symbol.is_comparison() || matches!(symbol, Symbol::Equal | Symbol::NotEqual) + }); + + // --- PRECEDENCE LEVEL 5: Equality (==, !=) --- + current_iteration = 0; + for (i, operator) in operators.iter().enumerate() { + if matches!(operator, Symbol::Equal | Symbol::NotEqual) { + let index = i - current_iteration; + let left = expressions.remove(index); + let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; + + let node = match operator { + Symbol::Equal => LogicalExpression::Equal(boxed!(left), boxed!(right)), + Symbol::NotEqual => LogicalExpression::NotEqual(boxed!(left), boxed!(right)), + _ => unreachable!(), + }; + + expressions.insert( + index, + Spanned { + span, + node: Expression::Logical(Spanned { span, node }), + }, + ); + current_iteration += 1; + } + } + operators.retain(|symbol| !matches!(symbol, Symbol::Equal | Symbol::NotEqual)); + + // --- PRECEDENCE LEVEL 6: Logical AND (&&) --- + current_iteration = 0; + for (i, operator) in operators.iter().enumerate() { + if matches!(operator, Symbol::LogicalAnd) { + let index = i - current_iteration; + let left = expressions.remove(index); + let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; + + expressions.insert( + index, + Spanned { + span, + node: Expression::Logical(Spanned { + span, + node: LogicalExpression::And(boxed!(left), boxed!(right)), + }), + }, + ); + current_iteration += 1; + } + } + operators.retain(|symbol| !matches!(symbol, Symbol::LogicalAnd)); + + // --- PRECEDENCE LEVEL 7: Logical OR (||) --- + current_iteration = 0; + for (i, operator) in operators.iter().enumerate() { + if matches!(operator, Symbol::LogicalOr) { + let index = i - current_iteration; + let left = expressions.remove(index); + let right = expressions.remove(index); + let span = Span { + start_line: left.span.start_line, + start_col: left.span.start_col, + end_line: right.span.end_line, + end_col: right.span.end_col, + }; + + expressions.insert( + index, + Spanned { + span, + node: Expression::Logical(Spanned { + span, + node: LogicalExpression::Or(boxed!(left), boxed!(right)), + }), + }, + ); + current_iteration += 1; + } + } + operators.retain(|symbol| !matches!(symbol, Symbol::LogicalOr)); + + if expressions.len() != 1 || !operators.is_empty() { + return Err(Error::InvalidSyntax( + self.current_span(), + String::from("Invalid number of operators"), + )); + } + + if token_matches!( + temp_token, + TokenType::Symbol(Symbol::Semicolon) | TokenType::Symbol(Symbol::RParen) + ) { + self.tokenizer.seek(SeekFrom::Current(-1))?; + } + + expressions.pop().ok_or(Error::UnexpectedEOF) + } + + fn priority(&mut self) -> Result>>, Error> { + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; + if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); + } + + self.assign_next()?; + let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(current_token, TokenType::Symbol(Symbol::RParen)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); + } + + Ok(Some(boxed!(expression))) + } + + fn invocation(&mut self) -> Result { + let identifier_token = self.current_token.as_ref().unwrap(); + let identifier_span = Self::token_to_span(identifier_token); + let identifier = match identifier_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )); + } + }; + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); + } + + let mut arguments = Vec::>::new(); + + while !token_matches!( + self.get_next()?.ok_or(Error::UnexpectedEOF)?, + TokenType::Symbol(Symbol::RParen) + ) { + let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; + + if let Expression::Block(_) = expression.node { + return Err(Error::InvalidSyntax( + self.current_span(), + String::from("Block expressions are not allowed in function invocations"), + )); + } + + arguments.push(expression); + + if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) + && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) + { + let next_token = self.get_next()?.unwrap(); + return Err(Error::UnexpectedToken( + Self::token_to_span(next_token), + next_token.clone(), + )); + } + + if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { + self.assign_next()?; + } + } + + Ok(InvocationExpression { + name: Spanned { + span: identifier_span, + node: identifier, + }, + arguments, + }) + } + + fn block(&mut self) -> Result { + let mut expressions = Vec::>::new(); + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; + + if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) { + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); + } + + while !self_matches_peek!( + self, + TokenType::Symbol(Symbol::RBrace) | TokenType::Keyword(Keyword::Return) + ) { + let expression = self.parse()?.ok_or(Error::UnexpectedEOF)?; + expressions.push(expression); + } + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + + if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) { + // Need to capture return span + let ret_start_span = Self::token_to_span(current_token); + self.assign_next()?; + let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; + + let ret_span = Span { + start_line: ret_start_span.start_line, + start_col: ret_start_span.start_col, + end_line: expression.span.end_line, + end_col: expression.span.end_col, + }; + + let return_expr = Spanned { + span: ret_span, + node: Expression::Return(boxed!(expression)), + }; + expressions.push(return_expr); + + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::RBrace)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + } + + Ok(BlockExpression(expressions)) + } + + fn declaration(&mut self) -> Result { + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; + if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) { + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); + } + let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let identifier_span = Self::token_to_span(identifier_token); + let identifier = match identifier_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(identifier_token), + identifier_token.clone(), + )); + } + }; + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); + + if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(¤t_token), + current_token.clone(), + )); + } + + self.assign_next()?; + let assignment_expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(current_token, TokenType::Symbol(Symbol::Semicolon)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); + } + + Ok(Expression::Declaration( + Spanned { + span: identifier_span, + node: identifier, + }, + boxed!(assignment_expression), + )) + } + + fn literal(&mut self) -> Result { + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; + let literal = match current_token.token_type { + TokenType::Number(num) => Literal::Number(num), + TokenType::String(ref string) => Literal::String(string.clone()), + TokenType::Boolean(boolean) => Literal::Boolean(boolean), + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + current_token.clone(), + )); + } + }; + + Ok(literal) + } + + fn if_expression(&mut self) -> Result { + // 'if' is current + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::LParen)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + self.assign_next()?; + + let condition = self.expression()?.ok_or(Error::UnexpectedEOF)?; + + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::RParen)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + + let body = self.spanned(|p| p.block())?; + + let else_branch = if self_matches_peek!(self, TokenType::Keyword(Keyword::Else)) { + self.assign_next()?; + + if self_matches_peek!(self, TokenType::Keyword(Keyword::If)) { + self.assign_next()?; + // Recurse for else if + let if_expr = self.spanned(|p| p.if_expression())?; + Some(boxed!(Spanned { + span: if_expr.span, + node: Expression::If(if_expr), + })) + } else if self_matches_peek!(self, TokenType::Symbol(Symbol::LBrace)) { + self.assign_next()?; + let block = self.spanned(|p| p.block())?; + Some(boxed!(Spanned { + span: block.span, + node: Expression::Block(block), + })) + } else { + let next = self.get_next()?.unwrap(); + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + } else { + None + }; + + Ok(IfExpression { + condition: boxed!(condition), + body, + else_branch, + }) + } + + fn loop_expression(&mut self) -> Result { + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + + let body = self.spanned(|p| p.block())?; + + Ok(LoopExpression { body }) + } + + fn while_expression(&mut self) -> Result { + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::LParen)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + self.assign_next()?; + + let condition = self.expression()?.ok_or(Error::UnexpectedEOF)?; + + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::RParen)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + + let body = self.block()?; + + Ok(WhileExpression { + condition: boxed!(condition), + body, + }) + } + + fn function(&mut self) -> Result { + // 'fn' is current + let fn_ident_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + let fn_ident_span = Self::token_to_span(fn_ident_token); + let fn_ident = match fn_ident_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(fn_ident_token), + fn_ident_token.clone(), + )); + } + }; + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); + } + + let mut arguments = Vec::>::new(); + + while !token_matches!( + self.get_next()?.ok_or(Error::UnexpectedEOF)?, + TokenType::Symbol(Symbol::RParen) + ) { + let current_token = self.current_token.as_ref().unwrap(); + let arg_span = Self::token_to_span(current_token); + let argument = match current_token.token_type { + TokenType::Identifier(ref id) => id.clone(), + _ => { + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); + } + }; + + let spanned_arg = Spanned { + span: arg_span, + node: argument, + }; + + if arguments.contains(&spanned_arg) { + return Err(Error::DuplicateIdentifier( + Self::token_to_span(current_token), + current_token.clone(), + )); + } + + arguments.push(spanned_arg); + + if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) + && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) + { + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + return Err(Error::UnexpectedToken( + Self::token_to_span(next), + next.clone(), + )); + } + + if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { + self.assign_next()?; + } + } + + let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; + if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) { + return Err(Error::UnexpectedToken( + Self::token_to_span(current_token), + current_token.clone(), + )); + }; + + Ok(FunctionExpression { + name: Spanned { + span: fn_ident_span, + node: fn_ident, + }, + arguments, + body: self.block()?, + }) + } + + fn syscall(&mut self) -> Result { + fn check_length( + parser: &Parser, + arguments: &[Spanned], + length: usize, + ) -> Result<(), Error> { + if arguments.len() != length { + return Err(Error::InvalidSyntax( + parser.current_span(), + format!("Expected {} arguments", length), + )); + } + Ok(()) + } + + macro_rules! literal_or_variable { + ($iter:expr) => { + match $iter { + Some(expr) => match &expr.node { + Expression::Literal(literal) => { + LiteralOrVariable::Literal(literal.node.clone()) + } + Expression::Variable(ident) => LiteralOrVariable::Variable(ident.clone()), + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )) + } + }, + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )) + } + } + }; + } + + macro_rules! get_arg { + ($matcher: ident, $arg: expr) => { + match $arg { + LiteralOrVariable::$matcher(i) => i, + _ => { + return Err(Error::InvalidSyntax( + self.current_span(), + String::from("Expected a variable"), + )) + } + } + }; + } + + let invocation = self.invocation()?; + + match invocation.name.node.as_str() { + "yield" => { + check_length(self, &invocation.arguments, 0)?; + Ok(SysCall::System(sys_call::System::Yield)) + } + "sleep" => { + check_length(self, &invocation.arguments, 1)?; + let mut arg = invocation.arguments.into_iter(); + let expr = arg.next().ok_or(Error::UnexpectedEOF)?; + Ok(SysCall::System(System::Sleep(boxed!(expr)))) + } + "hash" => { + check_length(self, &invocation.arguments, 1)?; + let mut args = invocation.arguments.into_iter(); + let lit_str = literal_or_variable!(args.next()); + + let LiteralOrVariable::Literal(lit_str) = lit_str else { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )); + }; + + Ok(SysCall::System(System::Hash(lit_str))) + } + "loadFromDevice" => { + check_length(self, &invocation.arguments, 2)?; + let mut args = invocation.arguments.into_iter(); + + let device = literal_or_variable!(args.next()); + let next_arg = args.next(); + + let variable = match next_arg { + Some(expr) => match expr.node { + Expression::Literal(spanned_lit) => match spanned_lit.node { + Literal::String(s) => s, + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )); + } + }, + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )); + } + }, + _ => { + return Err(Error::UnexpectedToken( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )); + } + }; + + Ok(SysCall::System(sys_call::System::LoadFromDevice( + device, + Literal::String(variable), + ))) + } + // ... (implementing other syscalls similarly using patterns above) + "setOnDevice" => { + check_length(self, &invocation.arguments, 3)?; + let mut args = invocation.arguments.into_iter(); + let device = literal_or_variable!(args.next()); + let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); + let variable = args.next().ok_or(Error::UnexpectedEOF)?; + Ok(SysCall::System(sys_call::System::SetOnDevice( + device, + Literal::String(logic_type.to_string().replace("\"", "")), + boxed!(variable), + ))) + } + "setOnDeviceBatched" => { + check_length(self, &invocation.arguments, 3)?; + let mut args = invocation.arguments.into_iter(); + let device_hash = literal_or_variable!(args.next()); + let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); + let variable = args.next().ok_or(Error::UnexpectedEOF)?; + Ok(SysCall::System(sys_call::System::SetOnDeviceBatched( + device_hash, + Literal::String(logic_type.to_string().replace("\"", "")), + boxed!(variable), + ))) + } + _ => { + // For Math functions or unknown functions + if SysCall::is_syscall(&invocation.name.node) { + // Attempt to parse as math if applicable, or error if strict + // Here we are falling back to simple handling or error. + // Since Math isn't fully expanded in this snippet, we return Unsupported. + Err(Error::UnsupportedKeyword( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )) + } else { + Err(Error::UnsupportedKeyword( + self.current_span(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, + )) + } + } + } + } +} diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index d82ec44..bf5217e 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -563,7 +563,10 @@ impl<'a> Parser<'a> { }), }) } - TokenType::Symbol(Symbol::LParen) => Ok(*self.spanned(|p| p.priority())?.node.unwrap()), + TokenType::Symbol(Symbol::LParen) => Ok(*self + .spanned(|p| p.priority())? + .node + .ok_or(Error::UnexpectedEOF)?), TokenType::Identifier(_) if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => { @@ -663,14 +666,14 @@ impl<'a> Parser<'a> { } fn assignment(&mut self) -> Result { - let identifier_token = self.current_token.as_ref().unwrap(); + let identifier_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; let identifier_span = Self::token_to_span(identifier_token); let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { return Err(Error::UnexpectedToken( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )); } }; @@ -966,7 +969,7 @@ impl<'a> Parser<'a> { self.tokenizer.seek(SeekFrom::Current(-1))?; } - Ok(expressions.pop().unwrap()) + expressions.pop().ok_or(Error::UnexpectedEOF) } fn priority(&mut self) -> Result>>, Error> { @@ -993,14 +996,14 @@ impl<'a> Parser<'a> { } fn invocation(&mut self) -> Result { - let identifier_token = self.current_token.as_ref().unwrap(); + let identifier_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; let identifier_span = Self::token_to_span(identifier_token); let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { return Err(Error::UnexpectedToken( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )); } }; @@ -1033,7 +1036,7 @@ impl<'a> Parser<'a> { if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { - let next_token = self.get_next()?.unwrap(); + let next_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; return Err(Error::UnexpectedToken( Self::token_to_span(next_token), next_token.clone(), @@ -1230,7 +1233,7 @@ impl<'a> Parser<'a> { node: Expression::Block(block), })) } else { - let next = self.get_next()?.unwrap(); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; return Err(Error::UnexpectedToken( Self::token_to_span(next), next.clone(), @@ -1325,7 +1328,7 @@ impl<'a> Parser<'a> { self.get_next()?.ok_or(Error::UnexpectedEOF)?, TokenType::Symbol(Symbol::RParen) ) { - let current_token = self.current_token.as_ref().unwrap(); + let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; let arg_span = Self::token_to_span(current_token); let argument = match current_token.token_type { TokenType::Identifier(ref id) => id.clone(), @@ -1354,7 +1357,7 @@ impl<'a> Parser<'a> { if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { - let next = self.get_next()?.unwrap(); + let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; return Err(Error::UnexpectedToken( Self::token_to_span(next), next.clone(), @@ -1410,14 +1413,14 @@ impl<'a> Parser<'a> { _ => { return Err(Error::UnexpectedToken( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )) } }, _ => { return Err(Error::UnexpectedToken( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )) } } @@ -1448,7 +1451,7 @@ impl<'a> Parser<'a> { "sleep" => { check_length(self, &invocation.arguments, 1)?; let mut arg = invocation.arguments.into_iter(); - let expr = arg.next().unwrap(); + let expr = arg.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::System(System::Sleep(boxed!(expr)))) } "hash" => { @@ -1459,7 +1462,7 @@ impl<'a> Parser<'a> { let LiteralOrVariable::Literal(lit_str) = lit_str else { return Err(Error::UnexpectedToken( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )); }; @@ -1479,21 +1482,21 @@ impl<'a> Parser<'a> { _ => { return Err(Error::UnexpectedToken( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )); } }, _ => { return Err(Error::UnexpectedToken( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )); } }, _ => { return Err(Error::UnexpectedToken( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )); } }; @@ -1509,7 +1512,7 @@ impl<'a> Parser<'a> { let mut args = invocation.arguments.into_iter(); let device = literal_or_variable!(args.next()); let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); - let variable = args.next().unwrap(); + let variable = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::System(sys_call::System::SetOnDevice( device, Literal::String(logic_type.to_string().replace("\"", "")), @@ -1521,7 +1524,7 @@ impl<'a> Parser<'a> { let mut args = invocation.arguments.into_iter(); let device_hash = literal_or_variable!(args.next()); let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); - let variable = args.next().unwrap(); + let variable = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::System(sys_call::System::SetOnDeviceBatched( device_hash, Literal::String(logic_type.to_string().replace("\"", "")), @@ -1536,12 +1539,12 @@ impl<'a> Parser<'a> { // Since Math isn't fully expanded in this snippet, we return Unsupported. Err(Error::UnsupportedKeyword( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )) } else { Err(Error::UnsupportedKeyword( self.current_span(), - self.current_token.clone().unwrap(), + self.current_token.clone().ok_or(Error::UnexpectedEOF)?, )) } } From 997dd505850b5b4c32fb62a95d59a69e1c11b519 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Mon, 1 Dec 2025 15:07:15 -0700 Subject: [PATCH 11/12] remove weird '2' file --- 2 | 1553 ------------------------------------------------------------- 1 file changed, 1553 deletions(-) delete mode 100644 2 diff --git a/2 b/2 deleted file mode 100644 index 18f06ed..0000000 --- a/2 +++ /dev/null @@ -1,1553 +0,0 @@ -#[cfg(test)] -mod test; - -pub mod sys_call; -pub mod tree_node; - -use crate::sys_call::System; -use quick_error::quick_error; -use std::io::SeekFrom; -use sys_call::SysCall; -use tokenizer::{ - self, Tokenizer, TokenizerBuffer, - token::{Keyword, Symbol, Token, TokenType}, -}; -use tree_node::*; - -#[macro_export] -/// A macro to create a boxed value. -macro_rules! boxed { - ($e:expr) => { - Box::new($e) - }; -} - -quick_error! { - #[derive(Debug)] - pub enum Error { - TokenizerError(err: tokenizer::Error) { - from() - display("Tokenizer Error: {}", err) - source(err) - } - UnexpectedToken(span: Span, token: Token) { - display("Unexpected token: {}", token.token_type) - } - DuplicateIdentifier(span: Span, token: Token) { - display("Duplicate identifier: {}", token.token_type) - } - InvalidSyntax(span: Span, reason: String) { - display("Invalid syntax: {}", reason) - } - UnsupportedKeyword(span: Span, token: Token) { - display("Unsupported keyword: {}", token.token_type) - } - UnexpectedEOF { - display("Unexpected EOF") - } - } -} - -impl From for lsp_types::Diagnostic { - fn from(value: Error) -> Self { - use Error::*; - use lsp_types::*; - match value { - TokenizerError(e) => e.into(), - UnexpectedToken(span, _) - | DuplicateIdentifier(span, _) - | InvalidSyntax(span, _) - | UnsupportedKeyword(span, _) => Diagnostic { - message: value.to_string(), - severity: Some(DiagnosticSeverity::ERROR), - range: span.into(), - ..Default::default() - }, - UnexpectedEOF => Diagnostic { - message: value.to_string(), - severity: Some(DiagnosticSeverity::ERROR), - ..Default::default() - }, - } - } -} - -macro_rules! self_matches_peek { - ($self:ident, $pattern:pat) => { - matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. })) - }; - ($self:ident, $pattern:pat if $cond:expr) => { - matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond) - }; -} - -macro_rules! token_matches { - ($token:ident, $pattern:pat) => { - matches!($token.token_type, $pattern) - }; - ($token:expr, $pattern:pat) => { - matches!($token.token_type, $pattern) - }; - ($token:ident, $pattern:pat if $cond:expr) => { - matches!($token.token_type, $pattern if $cond) - }; - ($token:expr, $pattern:pat if $cond:expr) => { - matches!($token.token_type, $pattern if $cond) - }; -} - -macro_rules! self_matches_current { - ($self:ident, $pattern:pat) => { - matches!($self.current_token, Some(Token { token_type: $pattern, .. })) - }; - ($self:ident, $pattern:pat if $cond:expr) => { - matches!($self.current_token, Some(Token { token_type: $pattern, .. }) if $cond) - }; -} - -pub struct Parser<'a> { - tokenizer: TokenizerBuffer<'a>, - current_token: Option, - pub errors: Vec, -} - -impl<'a> Parser<'a> { - pub fn new(tokenizer: Tokenizer<'a>) -> Self { - Parser { - tokenizer: TokenizerBuffer::new(tokenizer), - current_token: None, - errors: Vec::new(), - } - } - - /// Calculates a Span from a given Token reference. - /// This is a static helper to avoid borrowing `self` when we already have a token ref. - fn token_to_span(t: &Token) -> Span { - let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); - Span { - start_line: t.line, - start_col: t.column, - end_line: t.line, - end_col: t.column + len, - } - } - - fn current_span(&self) -> Span { - self.current_token - .as_ref() - .map(Self::token_to_span) - .unwrap_or(Span { - start_line: 0, - start_col: 0, - end_line: 0, - end_col: 0, - }) - } - - /// Helper to run a parsing closure and wrap the result in a Spanned struct - fn spanned(&mut self, parser: F) -> Result, Error> - where - F: FnOnce(&mut Self) -> Result, - { - // Peek at the start token. If no current token (parsing hasn't started), peek the buffer. - let start_token = if self.current_token.is_some() { - self.current_token.clone() - } else { - self.tokenizer.peek()? - }; - - let (start_line, start_col) = start_token - .as_ref() - .map(|t| (t.line, t.column)) - .unwrap_or((1, 1)); - - let node = parser(self)?; - - // The end token is the current_token after parsing. - let end_token = self.current_token.as_ref(); - - let (end_line, end_col) = end_token - .map(|t| { - let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); - (t.line, t.column + len) - }) - .unwrap_or((start_line, start_col)); - - Ok(Spanned { - span: Span { - start_line, - start_col, - end_line, - end_col, - }, - node, - }) - } - - /// Skips tokens until a statement boundary is found to recover from errors. - fn synchronize(&mut self) -> Result<(), Error> { - // We advance once to consume the error-causing token if we haven't already - // But often the error happens after we consumed something. - // Safe bet: consume current, then look. - - // If we assign next, we might be skipping the very token we want to sync on if the error didn't consume it? - // Usually, in recursive descent, the error is raised when `current` is unexpected. - // We want to discard `current` and move on. - self.assign_next()?; - - while let Some(token) = &self.current_token { - if token.token_type == TokenType::Symbol(Symbol::Semicolon) { - // Consuming the semicolon is a good place to stop and resume parsing next statement - self.assign_next()?; - return Ok(()); - } - - // Check if the token looks like the start of a statement. - // If so, we don't consume it; we return so the loop in parse_all can try to parse it. - match token.token_type { - TokenType::Keyword(Keyword::Fn) - | TokenType::Keyword(Keyword::Let) - | TokenType::Keyword(Keyword::If) - | TokenType::Keyword(Keyword::While) - | TokenType::Keyword(Keyword::Loop) - | TokenType::Keyword(Keyword::Device) - | TokenType::Keyword(Keyword::Return) => return Ok(()), - _ => {} - } - - self.assign_next()?; - } - - Ok(()) - } - - pub fn parse_all(&mut self) -> Result, Error> { - let first_token = self.tokenizer.peek().unwrap_or(None); - let (start_line, start_col) = first_token - .as_ref() - .map(|tok| (tok.line, tok.column)) - .unwrap_or((1, 1)); - - let mut expressions = Vec::>::new(); - - loop { - // Check EOF without unwrapping error - match self.tokenizer.peek() { - Ok(None) => break, - Err(e) => { - self.errors.push(Error::TokenizerError(e)); - break; - } - _ => {} - } - - match self.parse() { - Ok(Some(expression)) => { - expressions.push(expression); - } - Ok(None) => break, - Err(e) => { - self.errors.push(e); - // Recover - if self.synchronize().is_err() { - // If sync failed (e.g. EOF during sync), break - break; - } - } - } - } - - // Even if we had errors, we return whatever partial AST we managed to build. - // If expressions is empty and we had errors, it's a failed parse, but we return a block. - - // Use the last token position for end span, or start if nothing parsed - let end_token_opt = self.tokenizer.peek().unwrap_or(None); - let (end_line, end_col) = end_token_opt - .map(|tok| { - let len = tok.original_string.as_ref().map(|s| s.len()).unwrap_or(0); - (tok.line, tok.column + len) - }) - .unwrap_or((start_line, start_col)); - - let span = Span { - start_line, - end_line, - start_col, - end_col, - }; - - Ok(Some(Expression::Block(Spanned { - node: BlockExpression(expressions), - span, - }))) - } - - pub fn parse(&mut self) -> Result>, Error> { - self.assign_next()?; - - // If assign_next hit EOF or error? - if self.current_token.is_none() { - return Ok(None); - } - - let expr = self.expression()?; - - if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) { - self.assign_next()?; - } - - Ok(expr) - } - - fn assign_next(&mut self) -> Result<(), Error> { - self.current_token = self.tokenizer.next_token()?; - Ok(()) - } - - fn get_next(&mut self) -> Result, Error> { - self.assign_next()?; - Ok(self.current_token.as_ref()) - } - - fn expression(&mut self) -> Result>, Error> { - // Parse the Left Hand Side (unary/primary expression) - let lhs = self.unary()?; - - let Some(lhs) = lhs else { - return Ok(None); - }; - - // check if the next or current token is an operator, comparison, or logical symbol - if self_matches_peek!( - self, - TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() - ) { - return Ok(Some(self.infix(lhs)?)); - } else if self_matches_current!( - self, - TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() - ) { - self.tokenizer.seek(SeekFrom::Current(-1))?; - return Ok(Some(self.infix(lhs)?)); - } - - Ok(Some(lhs)) - } - - fn unary(&mut self) -> Result>, Error> { - macro_rules! matches_keyword { - ($keyword:expr, $($pattern:pat),+) => { - matches!($keyword, $($pattern)|+) - }; - } - - let Some(current_token) = self.current_token.as_ref() else { - return Ok(None); - }; - - if token_matches!(current_token, TokenType::EOF) { - return Ok(None); - } - - let expr = match current_token.token_type { - TokenType::Keyword(e) if matches_keyword!(e, Keyword::Enum) => { - return Err(Error::UnsupportedKeyword( - self.current_span(), - current_token.clone(), - )); - } - - TokenType::Keyword(Keyword::Let) => { - // declaration is wrapped in spanned inside the function, but expects 'let' to be current - Some(self.spanned(|p| p.declaration())?) - } - - TokenType::Keyword(Keyword::Device) => { - let spanned_dev = self.spanned(|p| p.device())?; - Some(Spanned { - span: spanned_dev.span, - node: Expression::DeviceDeclaration(spanned_dev), - }) - } - - TokenType::Keyword(Keyword::Fn) => { - let spanned_fn = self.spanned(|p| p.function())?; - Some(Spanned { - span: spanned_fn.span, - node: Expression::Function(spanned_fn), - }) - } - - TokenType::Keyword(Keyword::If) => { - let spanned_if = self.spanned(|p| p.if_expression())?; - Some(Spanned { - span: spanned_if.span, - node: Expression::If(spanned_if), - }) - } - - TokenType::Keyword(Keyword::Loop) => { - let spanned_loop = self.spanned(|p| p.loop_expression())?; - Some(Spanned { - span: spanned_loop.span, - node: Expression::Loop(spanned_loop), - }) - } - - TokenType::Keyword(Keyword::While) => { - let spanned_while = self.spanned(|p| p.while_expression())?; - Some(Spanned { - span: spanned_while.span, - node: Expression::While(spanned_while), - }) - } - - TokenType::Keyword(Keyword::Break) => { - let span = self.current_span(); - // make sure the next token is a semi-colon - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - Some(Spanned { - span, - node: Expression::Break(span), - }) - } - - TokenType::Keyword(Keyword::Continue) => { - let span = self.current_span(); - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - Some(Spanned { - span, - node: Expression::Continue(span), - }) - } - - TokenType::Identifier(ref id) if SysCall::is_syscall(id) => { - let spanned_call = self.spanned(|p| p.syscall())?; - Some(Spanned { - span: spanned_call.span, - node: Expression::Syscall(spanned_call), - }) - } - - TokenType::Identifier(_) - if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => - { - let spanned_invoke = self.spanned(|p| p.invocation())?; - Some(Spanned { - span: spanned_invoke.span, - node: Expression::Invocation(spanned_invoke), - }) - } - - TokenType::Identifier(_) - if self_matches_peek!(self, TokenType::Symbol(Symbol::Assign)) => - { - let spanned_assign = self.spanned(|p| p.assignment())?; - Some(Spanned { - span: spanned_assign.span, - node: Expression::Assignment(spanned_assign), - }) - } - - TokenType::Identifier(ref id) => { - let span = self.current_span(); - Some(Spanned { - span, - node: Expression::Variable(Spanned { - span, - node: id.clone(), - }), - }) - } - - TokenType::Symbol(Symbol::LBrace) => { - let spanned_block = self.spanned(|p| p.block())?; - Some(Spanned { - span: spanned_block.span, - node: Expression::Block(spanned_block), - }) - } - - TokenType::Number(_) | TokenType::String(_) | TokenType::Boolean(_) => { - let spanned_lit = self.spanned(|p| p.literal())?; - Some(Spanned { - span: spanned_lit.span, - node: Expression::Literal(spanned_lit), - }) - } - - TokenType::Symbol(Symbol::LParen) => { - // Priority handles its own spanning - self.spanned(|p| p.priority())?.node.map(|node| *node) - } - - TokenType::Symbol(Symbol::Minus) => { - // Need to handle span manually because unary call is next - let start_span = self.current_span(); - self.assign_next()?; - let inner_expr = self.unary()?.ok_or(Error::UnexpectedEOF)?; - let combined_span = Span { - start_line: start_span.start_line, - start_col: start_span.start_col, - end_line: inner_expr.span.end_line, - end_col: inner_expr.span.end_col, - }; - Some(Spanned { - span: combined_span, - node: Expression::Negation(boxed!(inner_expr)), - }) - } - - TokenType::Symbol(Symbol::LogicalNot) => { - let start_span = self.current_span(); - self.assign_next()?; - let inner_expr = self.unary()?.ok_or(Error::UnexpectedEOF)?; - let combined_span = Span { - start_line: start_span.start_line, - start_col: start_span.start_col, - end_line: inner_expr.span.end_line, - end_col: inner_expr.span.end_col, - }; - Some(Spanned { - span: combined_span, - node: Expression::Logical(Spanned { - span: combined_span, - node: LogicalExpression::Not(boxed!(inner_expr)), - }), - }) - } - - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - current_token.clone(), - )); - } - }; - - Ok(expr) - } - - fn get_infix_child_node(&mut self) -> Result, Error> { - let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; - - match current_token.token_type { - TokenType::Number(_) | TokenType::Boolean(_) => { - let lit = self.spanned(|p| p.literal())?; - Ok(Spanned { - span: lit.span, - node: Expression::Literal(lit), - }) - } - TokenType::Identifier(ref ident) - if !self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => - { - let span = self.current_span(); - Ok(Spanned { - span, - node: Expression::Variable(Spanned { - span, - node: ident.clone(), - }), - }) - } - TokenType::Symbol(Symbol::LParen) => Ok(*self - .spanned(|p| p.priority())? - .node - .ok_or(Error::UnexpectedEOF)?), - TokenType::Identifier(_) - if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => - { - let inv = self.spanned(|p| p.invocation())?; - Ok(Spanned { - span: inv.span, - node: Expression::Invocation(inv), - }) - } - TokenType::Symbol(Symbol::Minus) => { - let start_span = self.current_span(); - self.assign_next()?; - let inner = self.get_infix_child_node()?; - let span = Span { - start_line: start_span.start_line, - start_col: start_span.start_col, - end_line: inner.span.end_line, - end_col: inner.span.end_col, - }; - Ok(Spanned { - span, - node: Expression::Negation(boxed!(inner)), - }) - } - TokenType::Symbol(Symbol::LogicalNot) => { - let start_span = self.current_span(); - self.assign_next()?; - let inner = self.get_infix_child_node()?; - let span = Span { - start_line: start_span.start_line, - start_col: start_span.start_col, - end_line: inner.span.end_line, - end_col: inner.span.end_col, - }; - Ok(Spanned { - span, - node: Expression::Logical(Spanned { - span, - node: LogicalExpression::Not(boxed!(inner)), - }), - }) - } - _ => Err(Error::UnexpectedToken( - self.current_span(), - current_token.clone(), - )), - } - } - - fn device(&mut self) -> Result { - let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; - if !self_matches_current!(self, TokenType::Keyword(Keyword::Device)) { - return Err(Error::UnexpectedToken( - self.current_span(), - current_token.clone(), - )); - } - - let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let identifier_span = Self::token_to_span(identifier_token); - let identifier = match identifier_token.token_type { - TokenType::Identifier(ref id) => id.clone(), - _ => { - return Err(Error::UnexpectedToken( - Self::token_to_span(identifier_token), - identifier_token.clone(), - )); - } - }; - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), - )); - } - - let device_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let device = match device_token.token_type { - TokenType::String(ref id) => id.clone(), - _ => { - return Err(Error::UnexpectedToken( - Self::token_to_span(device_token), - device_token.clone(), - )); - } - }; - - Ok(DeviceDeclarationExpression { - name: Spanned { - span: identifier_span, - node: identifier, - }, - device, - }) - } - - fn assignment(&mut self) -> Result { - let identifier_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; - let identifier_span = Self::token_to_span(identifier_token); - let identifier = match identifier_token.token_type { - TokenType::Identifier(ref id) => id.clone(), - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )); - } - }; - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); - if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(¤t_token), - current_token.clone(), - )); - } - self.assign_next()?; - - let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - - Ok(AssignmentExpression { - identifier: Spanned { - span: identifier_span, - node: identifier, - }, - expression: boxed!(expression), - }) - } - - fn infix(&mut self, previous: Spanned) -> Result, Error> { - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); - - match previous.node { - Expression::Binary(_) - | Expression::Logical(_) - | Expression::Invocation(_) - | Expression::Priority(_) - | Expression::Literal(_) - | Expression::Variable(_) - | Expression::Negation(_) => {} - _ => { - return Err(Error::InvalidSyntax( - self.current_span(), - String::from("Invalid expression for binary/logical operation"), - )); - } - } - - let mut expressions = vec![previous]; - let mut operators = Vec::::new(); - - let mut temp_token = current_token.clone(); - - while token_matches!( - temp_token, - TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() - ) { - let operator = match temp_token.token_type { - TokenType::Symbol(s) => s, - _ => unreachable!(), - }; - operators.push(operator); - self.assign_next()?; - expressions.push(self.get_infix_child_node()?); - - temp_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); - } - - if operators.len() != expressions.len() - 1 { - return Err(Error::InvalidSyntax( - self.current_span(), - String::from("Invalid number of operators"), - )); - } - - // --- PRECEDENCE LEVEL 1: Exponent (**) --- - for (i, operator) in operators.iter().enumerate().rev() { - if operator == &Symbol::Exp { - let right = expressions.remove(i + 1); - let left = expressions.remove(i); - let span = Span { - start_line: left.span.start_line, - start_col: left.span.start_col, - end_line: right.span.end_line, - end_col: right.span.end_col, - }; - expressions.insert( - i, - Spanned { - span, - node: Expression::Binary(Spanned { - span, - node: BinaryExpression::Exponent(boxed!(left), boxed!(right)), - }), - }, - ); - } - } - operators.retain(|symbol| symbol != &Symbol::Exp); - - // Common macro for binary ops - macro_rules! process_binary_ops { - ($ops:pat, $variant:ident) => { - let mut current_iteration = 0; - for (i, operator) in operators.iter().enumerate() { - if matches!(operator, $ops) { - let index = i - current_iteration; - let left = expressions.remove(index); - let right = expressions.remove(index); - let span = Span { - start_line: left.span.start_line, - start_col: left.span.start_col, - end_line: right.span.end_line, - end_col: right.span.end_col, - }; - - let node = match operator { - Symbol::Asterisk => { - BinaryExpression::Multiply(boxed!(left), boxed!(right)) - } - Symbol::Slash => BinaryExpression::Divide(boxed!(left), boxed!(right)), - Symbol::Percent => { - BinaryExpression::Modulo(boxed!(left), boxed!(right)) - } - Symbol::Plus => BinaryExpression::Add(boxed!(left), boxed!(right)), - Symbol::Minus => { - BinaryExpression::Subtract(boxed!(left), boxed!(right)) - } - _ => unreachable!(), - }; - - expressions.insert( - index, - Spanned { - span, - node: Expression::Binary(Spanned { span, node }), - }, - ); - current_iteration += 1; - } - } - operators.retain(|symbol| !matches!(symbol, $ops)); - }; - } - - // --- PRECEDENCE LEVEL 2: Multiplicative (*, /, %) --- - process_binary_ops!( - Symbol::Slash | Symbol::Asterisk | Symbol::Percent, - BinaryExpression - ); - - // --- PRECEDENCE LEVEL 3: Additive (+, -) --- - process_binary_ops!(Symbol::Plus | Symbol::Minus, BinaryExpression); - - // --- PRECEDENCE LEVEL 4: Comparison (<, >, <=, >=) --- - let mut current_iteration = 0; - for (i, operator) in operators.iter().enumerate() { - if operator.is_comparison() && !matches!(operator, Symbol::Equal | Symbol::NotEqual) { - let index = i - current_iteration; - let left = expressions.remove(index); - let right = expressions.remove(index); - let span = Span { - start_line: left.span.start_line, - start_col: left.span.start_col, - end_line: right.span.end_line, - end_col: right.span.end_col, - }; - - let node = match operator { - Symbol::LessThan => LogicalExpression::LessThan(boxed!(left), boxed!(right)), - Symbol::GreaterThan => { - LogicalExpression::GreaterThan(boxed!(left), boxed!(right)) - } - Symbol::LessThanOrEqual => { - LogicalExpression::LessThanOrEqual(boxed!(left), boxed!(right)) - } - Symbol::GreaterThanOrEqual => { - LogicalExpression::GreaterThanOrEqual(boxed!(left), boxed!(right)) - } - _ => unreachable!(), - }; - - expressions.insert( - index, - Spanned { - span, - node: Expression::Logical(Spanned { span, node }), - }, - ); - current_iteration += 1; - } - } - operators.retain(|symbol| { - !symbol.is_comparison() || matches!(symbol, Symbol::Equal | Symbol::NotEqual) - }); - - // --- PRECEDENCE LEVEL 5: Equality (==, !=) --- - current_iteration = 0; - for (i, operator) in operators.iter().enumerate() { - if matches!(operator, Symbol::Equal | Symbol::NotEqual) { - let index = i - current_iteration; - let left = expressions.remove(index); - let right = expressions.remove(index); - let span = Span { - start_line: left.span.start_line, - start_col: left.span.start_col, - end_line: right.span.end_line, - end_col: right.span.end_col, - }; - - let node = match operator { - Symbol::Equal => LogicalExpression::Equal(boxed!(left), boxed!(right)), - Symbol::NotEqual => LogicalExpression::NotEqual(boxed!(left), boxed!(right)), - _ => unreachable!(), - }; - - expressions.insert( - index, - Spanned { - span, - node: Expression::Logical(Spanned { span, node }), - }, - ); - current_iteration += 1; - } - } - operators.retain(|symbol| !matches!(symbol, Symbol::Equal | Symbol::NotEqual)); - - // --- PRECEDENCE LEVEL 6: Logical AND (&&) --- - current_iteration = 0; - for (i, operator) in operators.iter().enumerate() { - if matches!(operator, Symbol::LogicalAnd) { - let index = i - current_iteration; - let left = expressions.remove(index); - let right = expressions.remove(index); - let span = Span { - start_line: left.span.start_line, - start_col: left.span.start_col, - end_line: right.span.end_line, - end_col: right.span.end_col, - }; - - expressions.insert( - index, - Spanned { - span, - node: Expression::Logical(Spanned { - span, - node: LogicalExpression::And(boxed!(left), boxed!(right)), - }), - }, - ); - current_iteration += 1; - } - } - operators.retain(|symbol| !matches!(symbol, Symbol::LogicalAnd)); - - // --- PRECEDENCE LEVEL 7: Logical OR (||) --- - current_iteration = 0; - for (i, operator) in operators.iter().enumerate() { - if matches!(operator, Symbol::LogicalOr) { - let index = i - current_iteration; - let left = expressions.remove(index); - let right = expressions.remove(index); - let span = Span { - start_line: left.span.start_line, - start_col: left.span.start_col, - end_line: right.span.end_line, - end_col: right.span.end_col, - }; - - expressions.insert( - index, - Spanned { - span, - node: Expression::Logical(Spanned { - span, - node: LogicalExpression::Or(boxed!(left), boxed!(right)), - }), - }, - ); - current_iteration += 1; - } - } - operators.retain(|symbol| !matches!(symbol, Symbol::LogicalOr)); - - if expressions.len() != 1 || !operators.is_empty() { - return Err(Error::InvalidSyntax( - self.current_span(), - String::from("Invalid number of operators"), - )); - } - - if token_matches!( - temp_token, - TokenType::Symbol(Symbol::Semicolon) | TokenType::Symbol(Symbol::RParen) - ) { - self.tokenizer.seek(SeekFrom::Current(-1))?; - } - - expressions.pop().ok_or(Error::UnexpectedEOF) - } - - fn priority(&mut self) -> Result>>, Error> { - let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; - if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken( - self.current_span(), - current_token.clone(), - )); - } - - self.assign_next()?; - let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(current_token, TokenType::Symbol(Symbol::RParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), - )); - } - - Ok(Some(boxed!(expression))) - } - - fn invocation(&mut self) -> Result { - let identifier_token = self.current_token.as_ref().unwrap(); - let identifier_span = Self::token_to_span(identifier_token); - let identifier = match identifier_token.token_type { - TokenType::Identifier(ref id) => id.clone(), - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )); - } - }; - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), - )); - } - - let mut arguments = Vec::>::new(); - - while !token_matches!( - self.get_next()?.ok_or(Error::UnexpectedEOF)?, - TokenType::Symbol(Symbol::RParen) - ) { - let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - - if let Expression::Block(_) = expression.node { - return Err(Error::InvalidSyntax( - self.current_span(), - String::from("Block expressions are not allowed in function invocations"), - )); - } - - arguments.push(expression); - - if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) - && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) - { - let next_token = self.get_next()?.unwrap(); - return Err(Error::UnexpectedToken( - Self::token_to_span(next_token), - next_token.clone(), - )); - } - - if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { - self.assign_next()?; - } - } - - Ok(InvocationExpression { - name: Spanned { - span: identifier_span, - node: identifier, - }, - arguments, - }) - } - - fn block(&mut self) -> Result { - let mut expressions = Vec::>::new(); - let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; - - if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken( - self.current_span(), - current_token.clone(), - )); - } - - while !self_matches_peek!( - self, - TokenType::Symbol(Symbol::RBrace) | TokenType::Keyword(Keyword::Return) - ) { - let expression = self.parse()?.ok_or(Error::UnexpectedEOF)?; - expressions.push(expression); - } - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - - if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) { - // Need to capture return span - let ret_start_span = Self::token_to_span(current_token); - self.assign_next()?; - let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - - let ret_span = Span { - start_line: ret_start_span.start_line, - start_col: ret_start_span.start_col, - end_line: expression.span.end_line, - end_col: expression.span.end_col, - }; - - let return_expr = Spanned { - span: ret_span, - node: Expression::Return(boxed!(expression)), - }; - expressions.push(return_expr); - - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::RBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - } - - Ok(BlockExpression(expressions)) - } - - fn declaration(&mut self) -> Result { - let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; - if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) { - return Err(Error::UnexpectedToken( - self.current_span(), - current_token.clone(), - )); - } - let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let identifier_span = Self::token_to_span(identifier_token); - let identifier = match identifier_token.token_type { - TokenType::Identifier(ref id) => id.clone(), - _ => { - return Err(Error::UnexpectedToken( - Self::token_to_span(identifier_token), - identifier_token.clone(), - )); - } - }; - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); - - if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(¤t_token), - current_token.clone(), - )); - } - - self.assign_next()?; - let assignment_expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(current_token, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), - )); - } - - Ok(Expression::Declaration( - Spanned { - span: identifier_span, - node: identifier, - }, - boxed!(assignment_expression), - )) - } - - fn literal(&mut self) -> Result { - let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; - let literal = match current_token.token_type { - TokenType::Number(num) => Literal::Number(num), - TokenType::String(ref string) => Literal::String(string.clone()), - TokenType::Boolean(boolean) => Literal::Boolean(boolean), - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - current_token.clone(), - )); - } - }; - - Ok(literal) - } - - fn if_expression(&mut self) -> Result { - // 'if' is current - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - self.assign_next()?; - - let condition = self.expression()?.ok_or(Error::UnexpectedEOF)?; - - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::RParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - - let body = self.spanned(|p| p.block())?; - - let else_branch = if self_matches_peek!(self, TokenType::Keyword(Keyword::Else)) { - self.assign_next()?; - - if self_matches_peek!(self, TokenType::Keyword(Keyword::If)) { - self.assign_next()?; - // Recurse for else if - let if_expr = self.spanned(|p| p.if_expression())?; - Some(boxed!(Spanned { - span: if_expr.span, - node: Expression::If(if_expr), - })) - } else if self_matches_peek!(self, TokenType::Symbol(Symbol::LBrace)) { - self.assign_next()?; - let block = self.spanned(|p| p.block())?; - Some(boxed!(Spanned { - span: block.span, - node: Expression::Block(block), - })) - } else { - let next = self.get_next()?.unwrap(); - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - } else { - None - }; - - Ok(IfExpression { - condition: boxed!(condition), - body, - else_branch, - }) - } - - fn loop_expression(&mut self) -> Result { - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - - let body = self.spanned(|p| p.block())?; - - Ok(LoopExpression { body }) - } - - fn while_expression(&mut self) -> Result { - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - self.assign_next()?; - - let condition = self.expression()?.ok_or(Error::UnexpectedEOF)?; - - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::RParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - - let body = self.block()?; - - Ok(WhileExpression { - condition: boxed!(condition), - body, - }) - } - - fn function(&mut self) -> Result { - // 'fn' is current - let fn_ident_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let fn_ident_span = Self::token_to_span(fn_ident_token); - let fn_ident = match fn_ident_token.token_type { - TokenType::Identifier(ref id) => id.clone(), - _ => { - return Err(Error::UnexpectedToken( - Self::token_to_span(fn_ident_token), - fn_ident_token.clone(), - )); - } - }; - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), - )); - } - - let mut arguments = Vec::>::new(); - - while !token_matches!( - self.get_next()?.ok_or(Error::UnexpectedEOF)?, - TokenType::Symbol(Symbol::RParen) - ) { - let current_token = self.current_token.as_ref().unwrap(); - let arg_span = Self::token_to_span(current_token); - let argument = match current_token.token_type { - TokenType::Identifier(ref id) => id.clone(), - _ => { - return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), - )); - } - }; - - let spanned_arg = Spanned { - span: arg_span, - node: argument, - }; - - if arguments.contains(&spanned_arg) { - return Err(Error::DuplicateIdentifier( - Self::token_to_span(current_token), - current_token.clone(), - )); - } - - arguments.push(spanned_arg); - - if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) - && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) - { - let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); - } - - if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { - self.assign_next()?; - } - } - - let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), - )); - }; - - Ok(FunctionExpression { - name: Spanned { - span: fn_ident_span, - node: fn_ident, - }, - arguments, - body: self.block()?, - }) - } - - fn syscall(&mut self) -> Result { - fn check_length( - parser: &Parser, - arguments: &[Spanned], - length: usize, - ) -> Result<(), Error> { - if arguments.len() != length { - return Err(Error::InvalidSyntax( - parser.current_span(), - format!("Expected {} arguments", length), - )); - } - Ok(()) - } - - macro_rules! literal_or_variable { - ($iter:expr) => { - match $iter { - Some(expr) => match &expr.node { - Expression::Literal(literal) => { - LiteralOrVariable::Literal(literal.node.clone()) - } - Expression::Variable(ident) => LiteralOrVariable::Variable(ident.clone()), - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )) - } - }, - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )) - } - } - }; - } - - macro_rules! get_arg { - ($matcher: ident, $arg: expr) => { - match $arg { - LiteralOrVariable::$matcher(i) => i, - _ => { - return Err(Error::InvalidSyntax( - self.current_span(), - String::from("Expected a variable"), - )) - } - } - }; - } - - let invocation = self.invocation()?; - - match invocation.name.node.as_str() { - "yield" => { - check_length(self, &invocation.arguments, 0)?; - Ok(SysCall::System(sys_call::System::Yield)) - } - "sleep" => { - check_length(self, &invocation.arguments, 1)?; - let mut arg = invocation.arguments.into_iter(); - let expr = arg.next().ok_or(Error::UnexpectedEOF)?; - Ok(SysCall::System(System::Sleep(boxed!(expr)))) - } - "hash" => { - check_length(self, &invocation.arguments, 1)?; - let mut args = invocation.arguments.into_iter(); - let lit_str = literal_or_variable!(args.next()); - - let LiteralOrVariable::Literal(lit_str) = lit_str else { - return Err(Error::UnexpectedToken( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )); - }; - - Ok(SysCall::System(System::Hash(lit_str))) - } - "loadFromDevice" => { - check_length(self, &invocation.arguments, 2)?; - let mut args = invocation.arguments.into_iter(); - - let device = literal_or_variable!(args.next()); - let next_arg = args.next(); - - let variable = match next_arg { - Some(expr) => match expr.node { - Expression::Literal(spanned_lit) => match spanned_lit.node { - Literal::String(s) => s, - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )); - } - }, - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )); - } - }, - _ => { - return Err(Error::UnexpectedToken( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )); - } - }; - - Ok(SysCall::System(sys_call::System::LoadFromDevice( - device, - Literal::String(variable), - ))) - } - // ... (implementing other syscalls similarly using patterns above) - "setOnDevice" => { - check_length(self, &invocation.arguments, 3)?; - let mut args = invocation.arguments.into_iter(); - let device = literal_or_variable!(args.next()); - let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); - let variable = args.next().ok_or(Error::UnexpectedEOF)?; - Ok(SysCall::System(sys_call::System::SetOnDevice( - device, - Literal::String(logic_type.to_string().replace("\"", "")), - boxed!(variable), - ))) - } - "setOnDeviceBatched" => { - check_length(self, &invocation.arguments, 3)?; - let mut args = invocation.arguments.into_iter(); - let device_hash = literal_or_variable!(args.next()); - let logic_type = get_arg!(Literal, literal_or_variable!(args.next())); - let variable = args.next().ok_or(Error::UnexpectedEOF)?; - Ok(SysCall::System(sys_call::System::SetOnDeviceBatched( - device_hash, - Literal::String(logic_type.to_string().replace("\"", "")), - boxed!(variable), - ))) - } - _ => { - // For Math functions or unknown functions - if SysCall::is_syscall(&invocation.name.node) { - // Attempt to parse as math if applicable, or error if strict - // Here we are falling back to simple handling or error. - // Since Math isn't fully expanded in this snippet, we return Unsupported. - Err(Error::UnsupportedKeyword( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )) - } else { - Err(Error::UnsupportedKeyword( - self.current_span(), - self.current_token.clone().ok_or(Error::UnexpectedEOF)?, - )) - } - } - } - } -} From abaefa294bfb9d985a18247f302b0370d57603dd Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Mon, 1 Dec 2025 15:35:44 -0700 Subject: [PATCH 12/12] rust catch_unwind as a safeguard --- rust_compiler/libs/compiler/src/v1.rs | 6 +- rust_compiler/src/ffi/mod.rs | 144 ++++++++++++++------------ 2 files changed, 84 insertions(+), 66 deletions(-) diff --git a/rust_compiler/libs/compiler/src/v1.rs b/rust_compiler/libs/compiler/src/v1.rs index 82cb19b..e4ef716 100644 --- a/rust_compiler/libs/compiler/src/v1.rs +++ b/rust_compiler/libs/compiler/src/v1.rs @@ -797,7 +797,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.write_output(format!("j {end_label}"))?; self.write_output(format!("{else_label}:"))?; - match expr.else_branch.unwrap().node { + match expr + .else_branch + .ok_or(Error::Unknown("Missing else branch. This should not happen and indicates a Compiler Error. Please report to the author.".into(), None))? + .node + { Expression::Block(block) => self.expression_block(block.node, scope)?, Expression::If(if_expr) => self.expression_if(if_expr.node, scope)?, _ => unreachable!("Parser ensures else branch is Block or If"), diff --git a/rust_compiler/src/ffi/mod.rs b/rust_compiler/src/ffi/mod.rs index a132297..5c0750e 100644 --- a/rust_compiler/src/ffi/mod.rs +++ b/rust_compiler/src/ffi/mod.rs @@ -82,90 +82,104 @@ pub fn free_string(s: safer_ffi::String) { /// from the GC from a `GetBytes()` call on a string in C#. #[ffi_export] pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::String { - let mut writer = BufWriter::new(Vec::new()); + let res = std::panic::catch_unwind(|| { + let mut writer = BufWriter::new(Vec::new()); - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); - let parser = Parser::new(tokenizer); - let compiler = Compiler::new(parser, &mut writer, None); + let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let parser = Parser::new(tokenizer); + let compiler = Compiler::new(parser, &mut writer, None); - if !compiler.compile().is_empty() { - return safer_ffi::String::EMPTY; - } + if !compiler.compile().is_empty() { + return safer_ffi::String::EMPTY; + } - let Ok(compiled_vec) = writer.into_inner() else { - return safer_ffi::String::EMPTY; - }; + let Ok(compiled_vec) = writer.into_inner() else { + return safer_ffi::String::EMPTY; + }; - // Safety: I know the compiler only outputs valid utf8 - safer_ffi::String::from(unsafe { String::from_utf8_unchecked(compiled_vec) }) + // Safety: I know the compiler only outputs valid utf8 + safer_ffi::String::from(unsafe { String::from_utf8_unchecked(compiled_vec) }) + }); + + res.unwrap_or("".into()) } #[ffi_export] pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let res = std::panic::catch_unwind(|| { + let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); - let mut tokens = Vec::new(); + let mut tokens = Vec::new(); - for token in tokenizer { - if matches!( - token, - Ok(Token { - token_type: TokenType::EOF, - .. - }) - ) { - continue; - } - match token { - Err(ref e) => { - use tokenizer::Error::*; - let (err_str, col, og) = match e { - NumberParseError(_, _, col, og) - | DecimalParseError(_, _, col, og) - | UnknownSymbolError(_, _, col, og) - | UnknownKeywordOrIdentifierError(_, _, col, og) => (e.to_string(), col, og), - _ => continue, - }; - - tokens.push(FfiToken { - column: *col as i32, - error: err_str.into(), - tooltip: "".into(), - length: og.len() as i32, - token_kind: 0, + for token in tokenizer { + if matches!( + token, + Ok(Token { + token_type: TokenType::EOF, + .. }) + ) { + continue; } - Ok(Token { - column, - original_string, - token_type, - .. - }) => tokens.push(FfiToken { - column: column as i32, - error: "".into(), - length: (original_string.unwrap_or_default().len()) as i32, - token_kind: token_type.into(), - tooltip: "".into(), - }), - } - } + match token { + Err(ref e) => { + use tokenizer::Error::*; + let (err_str, col, og) = match e { + NumberParseError(_, _, col, og) + | DecimalParseError(_, _, col, og) + | UnknownSymbolError(_, _, col, og) + | UnknownKeywordOrIdentifierError(_, _, col, og) => { + (e.to_string(), col, og) + } + _ => continue, + }; - tokens.into() + tokens.push(FfiToken { + column: *col as i32, + error: err_str.into(), + tooltip: "".into(), + length: og.len() as i32, + token_kind: 0, + }) + } + Ok(Token { + column, + original_string, + token_type, + .. + }) => tokens.push(FfiToken { + column: column as i32, + error: "".into(), + length: (original_string.unwrap_or_default().len()) as i32, + token_kind: token_type.into(), + tooltip: "".into(), + }), + } + } + + tokens.into() + }); + + res.unwrap_or(vec![].into()) } #[ffi_export] pub fn diagnose_source(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { - let mut writer = BufWriter::new(Vec::new()); - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); - let compiler = Compiler::new(Parser::new(tokenizer), &mut writer, None); + let res = std::panic::catch_unwind(|| { + let mut writer = BufWriter::new(Vec::new()); + let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let compiler = Compiler::new(Parser::new(tokenizer), &mut writer, None); - let diagnosis = compiler.compile(); + let diagnosis = compiler.compile(); - let mut result_vec: Vec = Vec::with_capacity(diagnosis.len()); + let mut result_vec: Vec = Vec::with_capacity(diagnosis.len()); - for err in diagnosis { - result_vec.push(lsp_types::Diagnostic::from(err).into()); - } + for err in diagnosis { + result_vec.push(lsp_types::Diagnostic::from(err).into()); + } - result_vec.into() + result_vec.into() + }); + + res.unwrap_or(vec![].into()) }