diff --git a/Changelog.md b/Changelog.md index f039671..a493374 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,11 @@ # Changelog +[0.2.0] + +- Completely re-wrote the tokenizer to use `logos` +- Changed AST and Token data structures to use `Cow` instead of `String` +- Updated error reporting to use `thiserror` instead of `quickerror` + [0.1.2] - Removed references to `Unitask` diff --git a/ModData/About/About.xml b/ModData/About/About.xml index b4f7c83..98e8f46 100644 --- a/ModData/About/About.xml +++ b/ModData/About/About.xml @@ -2,7 +2,7 @@ Slang JoeDiertay - 0.1.2 + 0.2.0 [h1]Slang: High-Level Programming for Stationeers[/h1] diff --git a/csharp_mod/Extensions.cs b/csharp_mod/Extensions.cs index 6a7e8f2..cefd685 100644 --- a/csharp_mod/Extensions.cs +++ b/csharp_mod/Extensions.cs @@ -55,7 +55,7 @@ public static unsafe class SlangExtensions var color = GetColorForKind(token.token_kind); - int colIndex = token.column - 1; + int colIndex = token.column; if (colIndex < 0) colIndex = 0; @@ -100,10 +100,10 @@ public static unsafe class SlangExtensions Severity = item.severity, Range = new Slang.Range { - EndCol = Math.Max(item.range.end_col - 2, 0), - EndLine = item.range.end_line - 1, - StartCol = Math.Max(item.range.start_col - 2, 0), - StartLine = item.range.end_line - 1, + EndCol = Math.Max(item.range.end_col, 0), + EndLine = item.range.end_line, + StartCol = Math.Max(item.range.start_col, 0), + StartLine = item.range.start_line, }, } ); @@ -134,6 +134,9 @@ public static unsafe class SlangExtensions case 7: // (punctuation) return SlangFormatter.ColorDefault; + case 8: // Comments + return SlangFormatter.ColorComment; + case 10: // (syscalls) return SlangFormatter.ColorFunction; diff --git a/csharp_mod/stationeersSlang.csproj b/csharp_mod/stationeersSlang.csproj index eda308a..f519032 100644 --- a/csharp_mod/stationeersSlang.csproj +++ b/csharp_mod/stationeersSlang.csproj @@ -5,7 +5,7 @@ enable StationeersSlang Slang Compiler Bridge - 0.1.2 + 0.2.0 true latest diff --git a/rust_compiler/Cargo.lock b/rust_compiler/Cargo.lock index 50bc630..6867837 100644 --- a/rust_compiler/Cargo.lock +++ b/rust_compiler/Cargo.lock @@ -28,6 +28,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.21" @@ -114,6 +123,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bitflags" version = "1.3.2" @@ -257,8 +272,8 @@ dependencies = [ "lsp-types", "parser", "pretty_assertions", - "quick-error", "rust_decimal", + "thiserror", "tokenizer", ] @@ -327,6 +342,12 @@ dependencies = [ "bitflags", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "funty" version = "2.0.0" @@ -434,6 +455,40 @@ version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +[[package]] +name = "logos" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a790d11254054e5dc83902dba85d253ff06ceb0cfafb12be8773435cb9dfb4f4" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60337c43a38313b58871f8d5d76872b8e17aa9d51fad494b5e76092c0ce05f5" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-automata", + "regex-syntax", + "rustc_version", + "syn 2.0.111", +] + +[[package]] +name = "logos-derive" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d151b2ae667f69e10b8738f5cac0c746faa22b2e15ea7e83b55476afec3767dc" +dependencies = [ + "logos-codegen", +] + [[package]] name = "lsp-types" version = "0.97.0" @@ -516,7 +571,7 @@ dependencies = [ "helpers", "lsp-types", "pretty_assertions", - "quick-error", + "thiserror", "tokenizer", ] @@ -593,12 +648,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "quick-error" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" - [[package]] name = "quote" version = "1.0.42" @@ -644,6 +693,23 @@ dependencies = [ "getrandom", ] +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "rend" version = "0.4.2" @@ -843,7 +909,7 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "slang" -version = "0.1.2" +version = "0.2.0" dependencies = [ "anyhow", "clap", @@ -851,9 +917,9 @@ dependencies = [ "helpers", "lsp-types", "parser", - "quick-error", "rust_decimal", "safer-ffi", + "thiserror", "tokenizer", ] @@ -926,6 +992,26 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "tinyvec" version = "1.10.0" @@ -947,9 +1033,10 @@ version = "0.1.0" dependencies = [ "anyhow", "helpers", + "logos", "lsp-types", - "quick-error", "rust_decimal", + "thiserror", ] [[package]] diff --git a/rust_compiler/Cargo.toml b/rust_compiler/Cargo.toml index bac327e..8969880 100644 --- a/rust_compiler/Cargo.toml +++ b/rust_compiler/Cargo.toml @@ -1,13 +1,13 @@ [package] name = "slang" -version = "0.1.2" +version = "0.2.0" edition = "2021" [workspace] members = ["libs/*"] [workspace.dependencies] -quick-error = "2" +thiserror = "2" rust_decimal = "1" safer-ffi = { version = "0.1" } # Safely share structs in memory between C# and Rust lsp-types = { version = "0.97" } # Allows for LSP style reporting to the frontend @@ -36,13 +36,13 @@ crate-type = ["cdylib", "rlib"] [dependencies] clap = { version = "^4.5", features = ["derive"] } lsp-types = { workspace = true } -quick-error = { workspace = true } +thiserror = { workspace = true } rust_decimal = { workspace = true } tokenizer = { path = "libs/tokenizer" } parser = { path = "libs/parser" } compiler = { path = "libs/compiler" } helpers = { path = "libs/helpers" } safer-ffi = { workspace = true } +anyhow = { version = "^1.0", features = ["backtrace"] } [dev-dependencies] -anyhow = { version = "^1.0", features = ["backtrace"] } diff --git a/rust_compiler/libs/compiler/Cargo.toml b/rust_compiler/libs/compiler/Cargo.toml index a21718c..829e4f9 100644 --- a/rust_compiler/libs/compiler/Cargo.toml +++ b/rust_compiler/libs/compiler/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] -quick-error = { workspace = true } +thiserror = { workspace = true } parser = { path = "../parser" } tokenizer = { path = "../tokenizer" } helpers = { path = "../helpers" } diff --git a/rust_compiler/libs/compiler/src/test/mod.rs b/rust_compiler/libs/compiler/src/test/mod.rs index b3e51c2..77d771a 100644 --- a/rust_compiler/libs/compiler/src/test/mod.rs +++ b/rust_compiler/libs/compiler/src/test/mod.rs @@ -22,7 +22,7 @@ macro_rules! compile { (result $source:expr) => {{ let mut writer = std::io::BufWriter::new(Vec::new()); let compiler = crate::Compiler::new( - parser::Parser::new(tokenizer::Tokenizer::from(String::from($source))), + parser::Parser::new(tokenizer::Tokenizer::from($source)), &mut writer, Some(crate::CompilerConfig { debug: true }), ); @@ -32,7 +32,7 @@ macro_rules! compile { (debug $source:expr) => {{ let mut writer = std::io::BufWriter::new(Vec::new()); let compiler = crate::Compiler::new( - parser::Parser::new(tokenizer::Tokenizer::from(String::from($source))), + parser::Parser::new(tokenizer::Tokenizer::from($source)), &mut writer, Some(crate::CompilerConfig { debug: true }), ); diff --git a/rust_compiler/libs/compiler/src/v1.rs b/rust_compiler/libs/compiler/src/v1.rs index 005b95f..ac3acf2 100644 --- a/rust_compiler/libs/compiler/src/v1.rs +++ b/rust_compiler/libs/compiler/src/v1.rs @@ -11,11 +11,12 @@ use parser::{ LoopExpression, MemberAccessExpression, Span, Spanned, WhileExpression, }, }; -use quick_error::quick_error; use std::{ + borrow::Cow, collections::HashMap, io::{BufWriter, Write}, }; +use thiserror::Error; use tokenizer::token::Number; macro_rules! debug { @@ -36,7 +37,10 @@ macro_rules! debug { }; } -fn extract_literal(literal: Literal, allow_strings: bool) -> Result { +fn extract_literal<'a>( + literal: Literal<'a>, + allow_strings: bool, +) -> Result, Error<'a>> { if !allow_strings && matches!(literal, Literal::String(_)) { return Err(Error::Unknown( "Literal strings are not allowed in this context".to_string(), @@ -45,59 +49,56 @@ fn extract_literal(literal: Literal, allow_strings: bool) -> Result s, - Literal::Number(n) => n.to_string(), - Literal::Boolean(b) => if b { "1" } else { "0" }.into(), + Literal::Number(n) => Cow::from(n.to_string()), + Literal::Boolean(b) => Cow::from(if b { "1" } else { "0" }), }) } -quick_error! { - #[derive(Debug)] - pub enum Error { - ParseError(error: parser::Error) { - from() - } - IoError(error: String) { - display("IO Error: {}", error) - } - ScopeError(error: variable_manager::Error) { - from() - } - DuplicateIdentifier(func_name: String, span: Span) { - display("`{func_name}` has already been defined") - } - UnknownIdentifier(ident: String, span: Span) { - display("`{ident}` is not found in the current scope.") - } - InvalidDevice(device: String, span: Span) { - display("`{device}` is not valid") - } - AgrumentMismatch(func_name: String, span: Span) { - display("Incorrect number of arguments passed into `{func_name}`") - } - ConstAssignment(ident: String, span: Span) { - display("Attempted to re-assign a value to const variable `{ident}`") - } - DeviceAssignment(ident: String, span: Span) { - display("Attempted to re-assign a value to a device const `{ident}`") - } - Unknown(reason: String, span: Option) { - display("{reason}") - } - } +#[derive(Error, Debug)] +pub enum Error<'a> { + #[error("{0}")] + Parse(parser::Error<'a>), + + #[error("{0}")] + Scope(variable_manager::Error<'a>), + + #[error("IO Error: {0}")] + IO(String), + + #[error("`{0}` has already been defined.")] + DuplicateIdentifier(Cow<'a, str>, Span), + + #[error("`{0}` is not found in the current scope.")] + UnknownIdentifier(Cow<'a, str>, Span), + + #[error("`{0}` is not valid.")] + InvalidDevice(Cow<'a, str>, Span), + + #[error("Incorrent number of arguments passed into `{0}`")] + AgrumentMismatch(Cow<'a, str>, Span), + + #[error("Attempted to re-assign a value to const variable `{0}`")] + ConstAssignment(Cow<'a, str>, Span), + + #[error("Attempted to re-assign a value to a device const `{0}`")] + DeviceAssignment(Cow<'a, str>, Span), + + #[error("{0}")] + Unknown(String, Option), } -impl From for lsp_types::Diagnostic { +impl<'a> From> for lsp_types::Diagnostic { fn from(value: Error) -> Self { use Error::*; use lsp_types::*; match value { - ParseError(e) => e.into(), - IoError(e) => Diagnostic { + Parse(e) => e.into(), + IO(e) => Diagnostic { message: e.to_string(), severity: Some(DiagnosticSeverity::ERROR), ..Default::default() }, - ScopeError(e) => e.into(), + Scope(e) => e.into(), DuplicateIdentifier(_, span) | UnknownIdentifier(_, span) | InvalidDevice(_, span) @@ -119,10 +120,22 @@ impl From for lsp_types::Diagnostic { } } +impl<'a> From> for Error<'a> { + fn from(value: parser::Error<'a>) -> Self { + Self::Parse(value) + } +} + +impl<'a> From> for Error<'a> { + fn from(value: variable_manager::Error<'a>) -> Self { + Self::Scope(value) + } +} + // Map io::Error to Error manually since we can't clone io::Error -impl From for Error { +impl<'a> From for Error<'a> { fn from(err: std::io::Error) -> Self { - Error::IoError(err.to_string()) + Error::IO(err.to_string()) } } @@ -132,32 +145,32 @@ pub struct CompilerConfig { pub debug: bool, } -struct CompilationResult { - location: VariableLocation, +struct CompilationResult<'a> { + location: VariableLocation<'a>, /// If Some, this is the name of the temporary variable that holds the result. /// It must be freed by the caller when done. - temp_name: Option, + temp_name: Option>, } -pub struct Compiler<'a, W: std::io::Write> { +pub struct Compiler<'a, 'w, W: std::io::Write> { pub parser: ASTParser<'a>, - function_locations: HashMap, - function_metadata: HashMap>, - devices: HashMap, - output: &'a mut BufWriter, + function_locations: HashMap, usize>, + function_metadata: HashMap, Vec>>, + devices: HashMap, Cow<'a, str>>, + output: &'w mut BufWriter, current_line: usize, declared_main: bool, config: CompilerConfig, temp_counter: usize, label_counter: usize, - loop_stack: Vec<(String, String)>, // Stores (start_label, end_label) - pub errors: Vec, + loop_stack: Vec<(Cow<'a, str>, Cow<'a, str>)>, // Stores (start_label, end_label) + pub errors: Vec>, } -impl<'a, W: std::io::Write> Compiler<'a, W> { +impl<'a, 'w, W: std::io::Write> Compiler<'a, 'w, W> { pub fn new( parser: ASTParser<'a>, - writer: &'a mut BufWriter, + writer: &'w mut BufWriter, config: Option, ) -> Self { Self { @@ -176,12 +189,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } } - pub fn compile(mut self) -> Vec { + pub fn compile(mut self) -> Vec> { let expr = self.parser.parse_all(); // Copy errors from parser for e in std::mem::take(&mut self.parser.errors) { - self.errors.push(Error::ParseError(e)); + self.errors.push(Error::Parse(e)); } // We treat parse_all result as potentially partial @@ -190,7 +203,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(None) => return self.errors, Err(e) => { // Should be covered by parser.errors, but just in case - self.errors.push(Error::ParseError(e)); + self.errors.push(Error::Parse(e)); return self.errors; } }; @@ -214,15 +227,17 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { return self.errors; } + let mut scope = VariableScope::default(); + // We ignore the result of the root expression (usually a block) - if let Err(e) = self.expression(spanned_root, &mut VariableScope::default()) { + if let Err(e) = self.expression(spanned_root, &mut scope) { self.errors.push(e); } self.errors } - fn write_output(&mut self, output: impl Into) -> Result<(), Error> { + fn write_output(&mut self, output: impl Into) -> Result<(), Error<'a>> { self.output.write_all(output.into().as_bytes())?; self.output.write_all(b"\n")?; self.current_line += 1; @@ -230,21 +245,21 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } - fn next_temp_name(&mut self) -> String { + fn next_temp_name(&mut self) -> Cow<'a, str> { self.temp_counter += 1; - format!("__binary_temp_{}", self.temp_counter) + Cow::from(format!("__binary_temp_{}", self.temp_counter)) } - fn next_label_name(&mut self) -> String { + fn next_label_name(&mut self) -> Cow<'a, str> { self.label_counter += 1; - format!("L{}", self.label_counter) + Cow::from(format!("L{}", self.label_counter)) } - fn expression<'v>( + fn expression( &mut self, - expr: Spanned, - scope: &mut VariableScope<'v>, - ) -> Result, Error> { + expr: Spanned>, + scope: &mut VariableScope<'a, '_>, + ) -> Result>, Error<'a>> { match expr.node { Expression::Function(expr_func) => { self.expression_function(expr_func, scope)?; @@ -303,11 +318,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Invocation returns result in r15 (RETURN_REGISTER). // If used as an expression, we must move it to a temp to avoid overwrite. let temp_name = self.next_temp_name(); - let temp_loc = scope.add_variable(&temp_name, LocationRequest::Temp, None)?; + let temp_loc = + scope.add_variable(temp_name.clone(), LocationRequest::Temp, None)?; self.emit_variable_assignment( - &temp_name, + temp_name.clone(), &temp_loc, - format!("r{}", VariableScope::RETURN_REGISTER), + Cow::from(format!("r{}", VariableScope::RETURN_REGISTER)), )?; Ok(Some(CompilationResult { location: temp_loc, @@ -325,8 +341,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Expression::Literal(spanned_lit) => match spanned_lit.node { Literal::Number(num) => { let temp_name = self.next_temp_name(); - let loc = scope.add_variable(&temp_name, LocationRequest::Temp, None)?; - self.emit_variable_assignment(&temp_name, &loc, num.to_string())?; + let loc = scope.add_variable(temp_name.clone(), LocationRequest::Temp, None)?; + self.emit_variable_assignment( + temp_name.clone(), + &loc, + Cow::from(num.to_string()), + )?; Ok(Some(CompilationResult { location: loc, temp_name: Some(temp_name), @@ -335,8 +355,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Literal::Boolean(b) => { let val = if b { "1" } else { "0" }; let temp_name = self.next_temp_name(); - let loc = scope.add_variable(&temp_name, LocationRequest::Temp, None)?; - self.emit_variable_assignment(&temp_name, &loc, val)?; + let loc = scope.add_variable(temp_name.clone(), LocationRequest::Temp, None)?; + self.emit_variable_assignment(temp_name.clone(), &loc, Cow::from(val))?; Ok(Some(CompilationResult { location: loc, temp_name: Some(temp_name), @@ -377,7 +397,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // 2. Allocate a temp register for the result let result_name = self.next_temp_name(); - let loc = scope.add_variable(&result_name, LocationRequest::Temp, None)?; + let loc = scope.add_variable(result_name.clone(), LocationRequest::Temp, None)?; let reg = self.resolve_register(&loc)?; // 3. Emit load instruction: l rX device member @@ -409,7 +429,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Compile negation as 0 - inner let (inner_str, cleanup) = self.compile_operand(*inner_expr, scope)?; let result_name = self.next_temp_name(); - let result_loc = scope.add_variable(&result_name, LocationRequest::Temp, None)?; + let result_loc = + scope.add_variable(result_name.clone(), LocationRequest::Temp, None)?; let result_reg = self.resolve_register(&result_loc)?; self.write_output(format!("sub {result_reg} 0 {inner_str}"))?; @@ -435,11 +456,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { /// Resolves an expression to a device identifier string for use in instructions like `s` or `l`. /// Returns (device_string, optional_cleanup_temp_name). - fn resolve_device<'v>( + fn resolve_device( &mut self, - expr: Spanned, - scope: &mut VariableScope<'v>, - ) -> Result<(String, Option), Error> { + expr: Spanned>, + scope: &mut VariableScope<'a, '_>, + ) -> Result<(Cow<'a, str>, Option>), Error<'a>> { // If it's a direct variable reference, check if it's a known device alias first if let Expression::Variable(ref name) = expr.node && let Some(device_id) = self.devices.get(&name.node) @@ -453,10 +474,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn emit_variable_assignment( &mut self, - var_name: &str, - location: &VariableLocation, - source_value: impl Into, - ) -> Result<(), Error> { + var_name: Cow<'a, str>, + location: &VariableLocation<'a>, + source_value: Cow<'a, str>, + ) -> Result<(), Error<'a>> { let debug_tag = if self.config.debug { format!(" #{var_name}") } else { @@ -465,10 +486,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { match location { VariableLocation::Temporary(reg) | VariableLocation::Persistant(reg) => { - self.write_output(format!("move r{reg} {}{debug_tag}", source_value.into()))?; + self.write_output(format!("move r{reg} {}{debug_tag}", source_value))?; } VariableLocation::Stack(_) => { - self.write_output(format!("push {}{debug_tag}", source_value.into()))?; + self.write_output(format!("push {}{debug_tag}", source_value))?; } VariableLocation::Constant(_) => { return Err(Error::Unknown( @@ -491,12 +512,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } - fn expression_declaration<'v>( + fn expression_declaration( &mut self, - var_name: Spanned, - expr: Spanned, - scope: &mut VariableScope<'v>, - ) -> Result, Error> { + var_name: Spanned>, + expr: Spanned>, + scope: &mut VariableScope<'a, '_>, + ) -> Result>, Error<'a>> { let name_str = var_name.node; let name_span = var_name.span; @@ -505,8 +526,13 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { && let Expression::Literal(spanned_lit) = &box_expr.node && let Literal::Number(neg_num) = &spanned_lit.node { - let loc = scope.add_variable(&name_str, LocationRequest::Persist, Some(name_span))?; - self.emit_variable_assignment(&name_str, &loc, format!("-{neg_num}"))?; + let loc = + scope.add_variable(name_str.clone(), LocationRequest::Persist, Some(name_span))?; + self.emit_variable_assignment( + name_str.clone(), + &loc, + Cow::from(format!("-{neg_num}")), + )?; return Ok(Some(CompilationResult { location: loc, temp_name: None, @@ -522,7 +548,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Some(name_span), )?; - self.emit_variable_assignment(&name_str, &var_location, num)?; + self.emit_variable_assignment( + name_str.clone(), + &var_location, + Cow::from(num.to_string()), + )?; (var_location, None) } Literal::Boolean(b) => { @@ -533,7 +563,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Some(name_span), )?; - self.emit_variable_assignment(&name_str, &var_location, val)?; + self.emit_variable_assignment(name_str, &var_location, Cow::from(val))?; (var_location, None) } _ => return Ok(None), @@ -541,12 +571,15 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Expression::Invocation(invoke_expr) => { self.expression_function_invocation(invoke_expr, scope)?; - let loc = - scope.add_variable(&name_str, LocationRequest::Persist, Some(name_span))?; + let loc = scope.add_variable( + name_str.clone(), + LocationRequest::Persist, + Some(name_span), + )?; self.emit_variable_assignment( - &name_str, + name_str, &loc, - format!("r{}", VariableScope::RETURN_REGISTER), + Cow::from(format!("r{}", VariableScope::RETURN_REGISTER)), )?; (loc, None) } @@ -566,12 +599,15 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { )); }; - let loc = - scope.add_variable(&name_str, LocationRequest::Persist, Some(name_span))?; + let loc = scope.add_variable( + name_str.clone(), + LocationRequest::Persist, + Some(name_span), + )?; self.emit_variable_assignment( - &name_str, + name_str, &loc, - format!("r{}", VariableScope::RETURN_REGISTER), + Cow::from(format!("r{}", VariableScope::RETURN_REGISTER)), )?; (loc, None) @@ -579,20 +615,23 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Support assigning binary expressions to variables directly Expression::Binary(bin_expr) => { let result = self.expression_binary(bin_expr, scope)?; - let var_loc = - scope.add_variable(&name_str, LocationRequest::Persist, Some(name_span))?; + let var_loc = scope.add_variable( + name_str.clone(), + LocationRequest::Persist, + Some(name_span), + )?; if let CompilationResult { location: VariableLocation::Constant(Literal::Number(num)), .. } = result { - self.emit_variable_assignment(&name_str, &var_loc, num)?; + self.emit_variable_assignment(name_str, &var_loc, Cow::from(num.to_string()))?; (var_loc, None) } else { // Move result from temp to new persistent variable let result_reg = self.resolve_register(&result.location)?; - self.emit_variable_assignment(&name_str, &var_loc, result_reg)?; + self.emit_variable_assignment(name_str, &var_loc, result_reg)?; // Free the temp result if let Some(name) = result.temp_name { @@ -603,12 +642,15 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } Expression::Logical(log_expr) => { let result = self.expression_logical(log_expr, scope)?; - let var_loc = - scope.add_variable(&name_str, LocationRequest::Persist, Some(name_span))?; + let var_loc = scope.add_variable( + name_str.clone(), + LocationRequest::Persist, + Some(name_span), + )?; // Move result from temp to new persistent variable let result_reg = self.resolve_register(&result.location)?; - self.emit_variable_assignment(&name_str, &var_loc, result_reg)?; + self.emit_variable_assignment(name_str, &var_loc, result_reg)?; // Free the temp result if let Some(name) = result.temp_name { @@ -628,8 +670,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } }; - let var_loc = - scope.add_variable(&name_str, LocationRequest::Persist, Some(name_span))?; + let var_loc = scope.add_variable( + name_str.clone(), + LocationRequest::Persist, + Some(name_span), + )?; // Handle loading from stack if necessary let src_str = match src_loc { @@ -649,7 +694,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } VariableLocation::Constant(_) | VariableLocation::Device(_) => unreachable!(), }; - self.emit_variable_assignment(&name_str, &var_loc, src_str)?; + self.emit_variable_assignment(name_str, &var_loc, Cow::from(src_str))?; (var_loc, None) } Expression::Priority(inner) => { @@ -680,11 +725,14 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { )); }; - let var_loc = - scope.add_variable(&name_str, LocationRequest::Persist, Some(name_span))?; + let var_loc = scope.add_variable( + name_str.clone(), + LocationRequest::Persist, + Some(name_span), + )?; let result_reg = self.resolve_register(&comp_res.location)?; - self.emit_variable_assignment(&name_str, &var_loc, result_reg)?; + self.emit_variable_assignment(name_str, &var_loc, result_reg)?; if let Some(temp) = comp_res.temp_name { scope.free_temp(temp, None)?; @@ -706,11 +754,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { })) } - fn expression_const_declaration<'v>( + fn expression_const_declaration( &mut self, - expr: ConstDeclarationExpression, - scope: &mut VariableScope<'v>, - ) -> Result { + expr: ConstDeclarationExpression<'a>, + scope: &mut VariableScope<'a, '_>, + ) -> Result, Error<'a>> { let ConstDeclarationExpression { name: const_name, value: const_value, @@ -741,11 +789,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { }) } - fn expression_assignment<'v>( + fn expression_assignment( &mut self, - expr: AssignmentExpression, - scope: &mut VariableScope<'v>, - ) -> Result<(), Error> { + expr: AssignmentExpression<'a>, + scope: &mut VariableScope<'a, '_>, + ) -> Result<(), Error<'a>> { let AssignmentExpression { assignee, expression, @@ -831,9 +879,9 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn expression_function_invocation( &mut self, - invoke_expr: Spanned, - stack: &mut VariableScope, - ) -> Result<(), Error> { + invoke_expr: Spanned>, + stack: &mut VariableScope<'a, '_>, + ) -> Result<(), Error<'a>> { let InvocationExpression { name, arguments } = invoke_expr.node; if !self.function_locations.contains_key(&name.node) { @@ -848,12 +896,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let Some(args) = self.function_metadata.get(&name.node) else { // Should be covered by check above - return Err(Error::UnknownIdentifier(name.node.clone(), name.span)); + return Err(Error::UnknownIdentifier(name.node, name.span)); }; if args.len() != arguments.len() { self.errors - .push(Error::AgrumentMismatch(name.node.clone(), name.span)); + .push(Error::AgrumentMismatch(name.node, name.span)); // Proceed anyway? The assembly will likely crash or act weird. // Best to skip generation of this call to prevent bad IC10 return Ok(()); @@ -862,7 +910,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // backup all used registers to the stack let active_registers = stack.registers().cloned().collect::>(); for register in &active_registers { - stack.add_variable(format!("temp_{register}"), LocationRequest::Stack, None)?; + stack.add_variable( + Cow::from(format!("temp_{register}")), + LocationRequest::Stack, + None, + )?; self.write_output(format!("push r{register}"))?; } for arg in arguments { @@ -879,15 +931,14 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { _ => {} }, Expression::Variable(var_name) => { - let loc = - match stack.get_location_of(var_name.node.clone(), Some(var_name.span)) { - Ok(l) => l, - Err(_) => { - self.errors - .push(Error::UnknownIdentifier(var_name.node, var_name.span)); - VariableLocation::Temporary(0) - } - }; + let loc = match stack.get_location_of(&var_name.node, Some(var_name.span)) { + Ok(l) => l, + Err(_) => { + self.errors + .push(Error::UnknownIdentifier(var_name.node, var_name.span)); + VariableLocation::Temporary(0) + } + }; match loc { VariableLocation::Persistant(reg) | VariableLocation::Temporary(reg) => { @@ -978,8 +1029,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { for register in active_registers { let VariableLocation::Stack(stack_offset) = stack - .get_location_of(format!("temp_{register}"), None) - .map_err(Error::ScopeError)? + .get_location_of(&Cow::from(format!("temp_{register}")), None) + .map_err(Error::Scope)? else { // This shouldn't happen if we just added it return Err(Error::Unknown( @@ -1004,7 +1055,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } - fn expression_device(&mut self, expr: DeviceDeclarationExpression) -> Result<(), Error> { + fn expression_device( + &mut self, + expr: DeviceDeclarationExpression<'a>, + ) -> Result<(), Error<'a>> { if self.devices.contains_key(&expr.name.node) { self.errors.push(Error::DuplicateIdentifier( expr.name.node.clone(), @@ -1020,11 +1074,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } - fn expression_if<'v>( + fn expression_if( &mut self, - expr: IfExpression, - scope: &mut VariableScope<'v>, - ) -> Result<(), Error> { + expr: IfExpression<'a>, + scope: &mut VariableScope<'a, '_>, + ) -> Result<(), Error<'a>> { let end_label = self.next_label_name(); let else_label = if expr.else_branch.is_some() { self.next_label_name() @@ -1067,11 +1121,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } - fn expression_loop<'v>( + fn expression_loop( &mut self, - expr: LoopExpression, - scope: &mut VariableScope<'v>, - ) -> Result<(), Error> { + expr: LoopExpression<'a>, + scope: &mut VariableScope<'a, '_>, + ) -> Result<(), Error<'a>> { let start_label = self.next_label_name(); let end_label = self.next_label_name(); @@ -1093,11 +1147,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } - fn expression_while<'v>( + fn expression_while( &mut self, - expr: WhileExpression, - scope: &mut VariableScope<'v>, - ) -> Result<(), Error> { + expr: WhileExpression<'a>, + scope: &mut VariableScope<'a, '_>, + ) -> Result<(), Error<'a>> { let start_label = self.next_label_name(); let end_label = self.next_label_name(); @@ -1129,7 +1183,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Ok(()) } - fn expression_break(&mut self) -> Result<(), Error> { + fn expression_break(&mut self) -> Result<(), Error<'a>> { if let Some((_, end_label)) = self.loop_stack.last() { self.write_output(format!("j {end_label}"))?; Ok(()) @@ -1141,7 +1195,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } } - fn expression_continue(&mut self) -> Result<(), Error> { + fn expression_continue(&mut self) -> Result<(), Error<'a>> { if let Some((start_label, _)) = self.loop_stack.last() { self.write_output(format!("j {start_label}"))?; Ok(()) @@ -1156,9 +1210,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { /// Helper to resolve a location to a register string (e.g., "r0"). /// Note: This does not handle Stack locations automatically, as they require /// instruction emission to load. Use `compile_operand` for general handling. - fn resolve_register(&self, loc: &VariableLocation) -> Result { + fn resolve_register(&self, loc: &VariableLocation) -> Result, Error<'a>> { match loc { - VariableLocation::Temporary(r) | VariableLocation::Persistant(r) => Ok(format!("r{r}")), + VariableLocation::Temporary(r) | VariableLocation::Persistant(r) => { + Ok(Cow::from(format!("r{r}"))) + } VariableLocation::Constant(_) => Err(Error::Unknown( "Cannot resolve a constant value to register".into(), None, @@ -1180,19 +1236,19 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { /// so the caller can free it. fn compile_operand( &mut self, - expr: Spanned, - scope: &mut VariableScope, - ) -> Result<(String, Option), Error> { + expr: Spanned>, + scope: &mut VariableScope<'a, '_>, + ) -> Result<(Cow<'a, str>, Option>), Error<'a>> { // Optimization for literals if let Expression::Literal(spanned_lit) = &expr.node { if let Literal::Number(n) = spanned_lit.node { - return Ok((n.to_string(), None)); + return Ok((Cow::from(n.to_string()), None)); } if let Literal::Boolean(b) = spanned_lit.node { - return Ok((if b { "1".to_string() } else { "0".to_string() }, None)); + return Ok((Cow::from(if b { "1" } else { "0" }), None)); } if let Literal::String(ref s) = spanned_lit.node { - return Ok((s.to_string(), None)); + return Ok((s.clone(), None)); } } @@ -1202,7 +1258,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { && let Expression::Literal(spanned_lit) = &inner.node && let Literal::Number(n) = spanned_lit.node { - return Ok((format!("-{}", n), None)); + return Ok((Cow::from(format!("-{}", n)), None)); } let result_opt = self.expression(expr, scope)?; @@ -1211,23 +1267,24 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Some(r) => r, None => { // Expression failed or returned void. Recover with dummy. - return Ok(("r0".to_string(), None)); + return Ok((Cow::from("r0"), None)); } }; match result.location { VariableLocation::Temporary(r) | VariableLocation::Persistant(r) => { - Ok((format!("r{r}"), result.temp_name)) + Ok((Cow::from(format!("r{r}")), result.temp_name)) } VariableLocation::Constant(lit) => match lit { - Literal::Number(n) => Ok((n.to_string(), None)), - Literal::Boolean(b) => Ok((if b { "1" } else { "0" }.to_string(), None)), + Literal::Number(n) => Ok((Cow::from(n.to_string()), None)), + Literal::Boolean(b) => Ok((Cow::from(if b { "1" } else { "0" }), None)), Literal::String(s) => Ok((s, None)), }, VariableLocation::Stack(offset) => { // If it's on the stack, we must load it into a temp to use it as an operand let temp_name = self.next_temp_name(); - let temp_loc = scope.add_variable(&temp_name, LocationRequest::Temp, None)?; + let temp_loc = + scope.add_variable(temp_name.clone(), LocationRequest::Temp, None)?; let temp_reg = self.resolve_register(&temp_loc)?; self.write_output(format!( @@ -1250,9 +1307,9 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn compile_literal_or_variable( &mut self, - val: LiteralOrVariable, - scope: &mut VariableScope, - ) -> Result<(String, Option), Error> { + val: LiteralOrVariable<'a>, + scope: &mut VariableScope<'a, '_>, + ) -> Result<(Cow<'a, str>, Option>), Error<'a>> { let dummy_span = Span { start_line: 0, start_col: 0, @@ -1276,12 +1333,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { ) } - fn expression_binary<'v>( + fn expression_binary( &mut self, - expr: Spanned, - scope: &mut VariableScope<'v>, - ) -> Result { - fn fold_binary_expression(expr: &BinaryExpression) -> Option { + expr: Spanned>, + scope: &mut VariableScope<'a, '_>, + ) -> Result, Error<'a>> { + fn fold_binary_expression<'a>(expr: &BinaryExpression<'a>) -> Option { let (lhs, rhs) = match &expr { BinaryExpression::Add(l, r) | BinaryExpression::Subtract(l, r) @@ -1301,7 +1358,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } } - fn fold_expression(expr: &Expression) -> Option { + fn fold_expression<'a>(expr: &Expression<'a>) -> Option { match expr { // 1. Base Case: It's already a number Expression::Literal(lit) => match lit.node { @@ -1349,7 +1406,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Allocate result register let result_name = self.next_temp_name(); - let result_loc = scope.add_variable(&result_name, LocationRequest::Temp, None)?; + let result_loc = scope.add_variable(result_name.clone(), LocationRequest::Temp, None)?; let result_reg = self.resolve_register(&result_loc)?; // Emit instruction: op result lhs rhs @@ -1369,17 +1426,18 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { }) } - fn expression_logical<'v>( + fn expression_logical( &mut self, - expr: Spanned, - scope: &mut VariableScope<'v>, - ) -> Result { + expr: Spanned>, + scope: &mut VariableScope<'a, '_>, + ) -> Result, Error<'a>> { match expr.node { LogicalExpression::Not(inner) => { let (inner_str, cleanup) = self.compile_operand(*inner, scope)?; let result_name = self.next_temp_name(); - let result_loc = scope.add_variable(&result_name, LocationRequest::Temp, None)?; + let result_loc = + scope.add_variable(result_name.clone(), LocationRequest::Temp, None)?; let result_reg = self.resolve_register(&result_loc)?; // seq rX rY 0 => if rY == 0 set rX = 1 else rX = 0 @@ -1414,7 +1472,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // Allocate result register let result_name = self.next_temp_name(); - let result_loc = scope.add_variable(&result_name, LocationRequest::Temp, None)?; + let result_loc = + scope.add_variable(result_name.clone(), LocationRequest::Temp, None)?; let result_reg = self.resolve_register(&result_loc)?; // Emit instruction: op result lhs rhs @@ -1438,9 +1497,9 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { fn expression_block<'v>( &mut self, - mut expr: BlockExpression, - parent_scope: &mut VariableScope<'v>, - ) -> Result<(), Error> { + mut expr: BlockExpression<'a>, + parent_scope: &'v mut VariableScope<'a, '_>, + ) -> Result<(), Error<'a>> { // First, sort the expressions to ensure functions are hoisted expr.0.sort_by(|a, b| { if matches!( @@ -1502,17 +1561,21 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } /// Takes the result of the expression and stores it in VariableScope::RETURN_REGISTER - fn expression_return<'v>( + fn expression_return( &mut self, - expr: Spanned, - scope: &mut VariableScope<'v>, - ) -> Result { + expr: Spanned>, + scope: &mut VariableScope<'a, '_>, + ) -> Result, Error<'a>> { if let Expression::Negation(neg_expr) = &expr.node && let Expression::Literal(spanned_lit) = &neg_expr.node && let Literal::Number(neg_num) = &spanned_lit.node { let loc = VariableLocation::Persistant(VariableScope::RETURN_REGISTER); - self.emit_variable_assignment("returnValue", &loc, format!("-{neg_num}"))?; + self.emit_variable_assignment( + Cow::from("returnValue"), + &loc, + Cow::from(format!("-{neg_num}")), + )?; return Ok(loc); }; @@ -1565,17 +1628,17 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { Expression::Literal(spanned_lit) => match spanned_lit.node { Literal::Number(num) => { self.emit_variable_assignment( - "returnValue", + Cow::from("returnValue"), &VariableLocation::Persistant(VariableScope::RETURN_REGISTER), - num, + Cow::from(num.to_string()), )?; } Literal::Boolean(b) => { let val = if b { "1" } else { "0" }; self.emit_variable_assignment( - "returnValue", + Cow::from("returnValue"), &VariableLocation::Persistant(VariableScope::RETURN_REGISTER), - val, + Cow::from(val.to_string()), )?; } _ => {} @@ -1634,12 +1697,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { // syscalls that return values will be stored in the VariableScope::RETURN_REGISTER // register - fn expression_syscall_system<'v>( + fn expression_syscall_system( &mut self, - expr: System, + expr: System<'a>, span: Span, - scope: &mut VariableScope<'v>, - ) -> Result, Error> { + scope: &mut VariableScope<'a, '_>, + ) -> Result>, Error<'a>> { macro_rules! cleanup { ($($to_clean:expr),*) => { $( @@ -1710,7 +1773,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { .devices .get(&device_name) .cloned() - .unwrap_or("d0".to_string()); + .unwrap_or(Cow::from("d0")); let Spanned { node: Literal::String(logic_type), @@ -1802,7 +1865,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { .devices .get(&device_name) .cloned() - .unwrap_or("d0".to_string()); + .unwrap_or(Cow::from("d0")); let Spanned { node: Literal::String(logic_type), @@ -1892,11 +1955,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } } - fn expression_syscall_math<'v>( + fn expression_syscall_math( &mut self, - expr: Math, - scope: &mut VariableScope<'v>, - ) -> Result, Error> { + expr: Math<'a>, + scope: &mut VariableScope<'a, '_>, + ) -> Result>, Error<'a>> { macro_rules! cleanup { ($($to_clean:expr),*) => { $( @@ -2088,11 +2151,11 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { /// Compile a function declaration. /// Calees are responsible for backing up any registers they wish to use. - fn expression_function<'v>( + fn expression_function( &mut self, - expr: Spanned, - scope: &mut VariableScope<'v>, - ) -> Result<(), Error> { + expr: Spanned>, + scope: &mut VariableScope<'a, '_>, + ) -> Result<(), Error<'a>> { let FunctionExpression { name, arguments, @@ -2174,7 +2237,7 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.write_output("push ra")?; block_scope.add_variable( - format!("{}_ra", name.node), + Cow::from(format!("{}_ra", name.node)), LocationRequest::Stack, Some(name.span), )?; @@ -2201,7 +2264,8 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } // Get the saved return address and save it back into `ra` - let ra_res = block_scope.get_location_of(format!("{}_ra", name.node), Some(name.span)); + let ra_res = + block_scope.get_location_of(&Cow::from(format!("{}_ra", name.node)), Some(name.span)); let ra_stack_offset = match ra_res { Ok(VariableLocation::Stack(offset)) => offset, diff --git a/rust_compiler/libs/compiler/src/variable_manager.rs b/rust_compiler/libs/compiler/src/variable_manager.rs index f5f7e72..77a5c24 100644 --- a/rust_compiler/libs/compiler/src/variable_manager.rs +++ b/rust_compiler/libs/compiler/src/variable_manager.rs @@ -5,28 +5,28 @@ use lsp_types::{Diagnostic, DiagnosticSeverity}; use parser::tree_node::{Literal, Span}; -use quick_error::quick_error; -use std::collections::{HashMap, VecDeque}; +use std::{ + borrow::Cow, + collections::{HashMap, VecDeque}, +}; +use thiserror::Error; const TEMP: [u8; 7] = [1, 2, 3, 4, 5, 6, 7]; const PERSIST: [u8; 7] = [8, 9, 10, 11, 12, 13, 14]; -quick_error! { - #[derive(Debug)] - pub enum Error { - DuplicateVariable(var: String, span: Option) { - display("{var} already exists.") - } - UnknownVariable(var: String, span: Option) { - display("{var} does not exist.") - } - Unknown(reason: String, span: Option) { - display("{reason}") - } - } +#[derive(Error, Debug)] +pub enum Error<'a> { + #[error("{0} already exists.")] + DuplicateVariable(Cow<'a, str>, Option), + + #[error("{0} does not exist.")] + UnknownVariable(Cow<'a, str>, Option), + + #[error("{0}")] + Unknown(Cow<'a, str>, Option), } -impl From for lsp_types::Diagnostic { +impl<'a> From> for lsp_types::Diagnostic { fn from(value: Error) -> Self { match value { Error::DuplicateVariable(_, span) @@ -53,7 +53,7 @@ pub enum LocationRequest { } #[derive(Clone)] -pub enum VariableLocation { +pub enum VariableLocation<'a> { /// Represents a temporary register (r1 - r7) Temporary(u8), /// Represents a persistant register (r8 - r14) @@ -61,20 +61,22 @@ pub enum VariableLocation { /// Represents a a stack offset (current stack - offset = variable loc) Stack(u16), /// Represents a constant value and should be directly substituted as such. - Constant(Literal), + Constant(Literal<'a>), /// Represents a device pin. This will contain the exact `d0-d5` string - Device(String), + Device(Cow<'a, str>), } -pub struct VariableScope<'a> { +// FIX: Added 'b lifetime for the parent reference +pub struct VariableScope<'a, 'b> { temporary_vars: VecDeque, persistant_vars: VecDeque, - var_lookup_table: HashMap, + var_lookup_table: HashMap, VariableLocation<'a>>, stack_offset: u16, - parent: Option<&'a VariableScope<'a>>, + parent: Option<&'b VariableScope<'a, 'b>>, } -impl<'a> Default for VariableScope<'a> { +// FIX: Updated Default impl to include 'b +impl<'a, 'b> Default for VariableScope<'a, 'b> { fn default() -> Self { Self { parent: None, @@ -86,7 +88,8 @@ impl<'a> Default for VariableScope<'a> { } } -impl<'a> VariableScope<'a> { +// FIX: Updated impl block to include 'b +impl<'a, 'b> VariableScope<'a, 'b> { #[allow(dead_code)] pub const TEMP_REGISTER_COUNT: u8 = 7; pub const PERSIST_REGISTER_COUNT: u8 = 7; @@ -109,7 +112,8 @@ impl<'a> VariableScope<'a> { }) } - pub fn scoped(parent: &'a VariableScope<'a>) -> Self { + // FIX: parent is now &'b VariableScope<'a, 'b> + pub fn scoped(parent: &'b VariableScope<'a, 'b>) -> Self { Self { parent: Option::Some(parent), temporary_vars: parent.temporary_vars.clone(), @@ -126,12 +130,11 @@ impl<'a> VariableScope<'a> { /// to the stack. pub fn add_variable( &mut self, - var_name: impl Into, + var_name: Cow<'a, str>, location: LocationRequest, span: Option, - ) -> Result { - let var_name = var_name.into(); - if self.var_lookup_table.contains_key(var_name.as_str()) { + ) -> Result, Error<'a>> { + if self.var_lookup_table.contains_key(&var_name) { return Err(Error::DuplicateVariable(var_name, span)); } let var_location = match location { @@ -166,11 +169,10 @@ impl<'a> VariableScope<'a> { pub fn define_const( &mut self, - var_name: impl Into, - value: Literal, + var_name: Cow<'a, str>, + value: Literal<'a>, span: Option, - ) -> Result { - let var_name = var_name.into(); + ) -> Result, Error<'a>> { if self.var_lookup_table.contains_key(&var_name) { return Err(Error::DuplicateVariable(var_name, span)); } @@ -183,13 +185,11 @@ impl<'a> VariableScope<'a> { pub fn get_location_of( &self, - var_name: impl Into, + var_name: &Cow<'a, str>, span: Option, - ) -> Result { - let var_name = var_name.into(); - + ) -> Result, Error<'a>> { // 1. Check this scope - if let Some(var) = self.var_lookup_table.get(var_name.as_str()) { + if let Some(var) = self.var_lookup_table.get(var_name) { if let VariableLocation::Stack(inserted_at_offset) = var { // Return offset relative to CURRENT sp return Ok(VariableLocation::Stack( @@ -210,7 +210,7 @@ impl<'a> VariableScope<'a> { return Ok(loc); } - Err(Error::UnknownVariable(var_name, span)) + Err(Error::UnknownVariable(var_name.clone(), span)) } pub fn has_parent(&self) -> bool { @@ -220,11 +220,10 @@ impl<'a> VariableScope<'a> { #[allow(dead_code)] pub fn free_temp( &mut self, - var_name: impl Into, + var_name: Cow<'a, str>, span: Option, - ) -> Result<(), Error> { - let var_name = var_name.into(); - let Some(location) = self.var_lookup_table.remove(var_name.as_str()) else { + ) -> Result<(), Error<'a>> { + let Some(location) = self.var_lookup_table.remove(&var_name) else { return Err(Error::UnknownVariable(var_name, span)); }; @@ -234,7 +233,7 @@ impl<'a> VariableScope<'a> { } VariableLocation::Persistant(_) => { return Err(Error::UnknownVariable( - String::from("Attempted to free a `let` variable."), + Cow::from("Attempted to free a `let` variable."), span, )); } diff --git a/rust_compiler/libs/compiler/test_files/script.slang b/rust_compiler/libs/compiler/test_files/script.slang new file mode 100644 index 0000000..8346c64 --- /dev/null +++ b/rust_compiler/libs/compiler/test_files/script.slang @@ -0,0 +1,41 @@ +// Pressure numbers are in KPa + +device self = "db"; +device emergencyRelief = "d0"; +device greenhouseSensor = "d1"; +device recycleValve = "d2"; + +const MAX_INTERIOR_PRESSURE = 80; +const MAX_INTERIOR_TEMP = 28c; +const MIN_INTERIOR_PRESSURE = 75; +const MIN_INTERIOR_TEMP = 25c; +const daylightSensor = 1076425094; +const growLight = hash("StructureGrowLight"); +const wallLight = hash("StructureLightLong"); +const lightRound = hash("StructureLightRound"); + +let shouldPurge = false; + +loop { + let interiorPress = greenhouseSensor.Pressure; + let interiorTemp = greenhouseSensor.Temperature; + + shouldPurge = ( + interiorPress > MAX_INTERIOR_PRESSURE || + interiorTemp > MAX_INTERIOR_TEMP + ) || shouldPurge; + + emergencyRelief.On = shouldPurge; + recycleValve.On = !shouldPurge; + + if (shouldPurge && (interiorPress < MIN_INTERIOR_PRESSURE && interiorTemp < MIN_INTERIOR_TEMP)) { + shouldPurge = false; + } + + let solarAngle = lb(daylightSensor, "SolarAngle", "Average"); + let isDaylight = solarAngle < 90; + + sb(growLight, "On", isDaylight); + sb(wallLight, "On", !isDaylight); + sb(lightRound, "On", !isDaylight); +} \ No newline at end of file diff --git a/rust_compiler/libs/helpers/src/macros.rs b/rust_compiler/libs/helpers/src/macros.rs index 9c51e46..ca9d260 100644 --- a/rust_compiler/libs/helpers/src/macros.rs +++ b/rust_compiler/libs/helpers/src/macros.rs @@ -3,15 +3,10 @@ macro_rules! documented { // ------------------------------------------------------------------------- // Internal Helper: Filter doc comments // ------------------------------------------------------------------------- - - // Case 1: Doc comment. Return Some("string"). - // We match the specific structure of a doc attribute. (@doc_filter #[doc = $doc:expr]) => { Some($doc) }; - // Case 2: Other attributes (derives, etc.). Return None. - // We catch any other token sequence inside the brackets. (@doc_filter #[$($attr:tt)*]) => { None }; @@ -30,23 +25,59 @@ macro_rules! documented { }; // ------------------------------------------------------------------------- - // Main Macro Entry Point + // Entry Point 1: Enum with a single Lifetime (e.g. enum Foo<'a>) + // ------------------------------------------------------------------------- + ( + $(#[$enum_attr:meta])* $vis:vis enum $name:ident < $lt:lifetime > { + $($body:tt)* + } + ) => { + documented!(@generate + meta: [$(#[$enum_attr])*], + vis: [$vis], + name: [$name], + generics: [<$lt>], + body: [$($body)*] + ); + }; + + // ------------------------------------------------------------------------- + // Entry Point 2: Regular Enum (No Generics) // ------------------------------------------------------------------------- ( $(#[$enum_attr:meta])* $vis:vis enum $name:ident { + $($body:tt)* + } + ) => { + documented!(@generate + meta: [$(#[$enum_attr])*], + vis: [$vis], + name: [$name], + generics: [], + body: [$($body)*] + ); + }; + + // ------------------------------------------------------------------------- + // Code Generator (Shared Logic) + // ------------------------------------------------------------------------- + (@generate + meta: [$(#[$enum_attr:meta])*], + vis: [$vis:vis], + name: [$name:ident], + generics: [$($generics:tt)*], + body: [ $( - // Capture attributes as a sequence of token trees inside brackets - // to avoid "local ambiguity" and handle multi-token attributes (like doc="..."). $(#[ $($variant_attr:tt)* ])* $variant:ident $( ($($tuple:tt)*) )? $( {$($structure:tt)*} )? ),* $(,)? - } + ] ) => { - // 1. Generate the actual Enum definition + // 1. Generate the Enum Definition $(#[$enum_attr])* - $vis enum $name { + $vis enum $name $($generics)* { $( $(#[ $($variant_attr)* ])* $variant @@ -55,20 +86,19 @@ macro_rules! documented { )* } - // 2. Implement the Documentation Trait - impl Documentation for $name { + // 2. Implement Documentation Trait + // We apply the captured generics (e.g., <'a>) to both the impl and the type + impl $($generics)* Documentation for $name $($generics)* { fn docs(&self) -> String { match self { $( documented!(@arm $name $variant $( ($($tuple)*) )? $( {$($structure)*} )? ) => { - // Create a temporary array of Option<&str> for all attributes let doc_lines: &[Option<&str>] = &[ $( documented!(@doc_filter #[ $($variant_attr)* ]) ),* ]; - // Filter out the Nones (non-doc attributes), join, and return doc_lines.iter() .filter_map(|&d| d) .collect::>() @@ -80,7 +110,6 @@ macro_rules! documented { } } - // 3. Implement Static Documentation Provider #[allow(dead_code)] fn get_all_documentation() -> Vec<(&'static str, String)> { vec![ @@ -88,7 +117,6 @@ macro_rules! documented { ( stringify!($variant), { - // Re-use the same extraction logic let doc_lines: &[Option<&str>] = &[ $( documented!(@doc_filter #[ $($variant_attr)* ]) diff --git a/rust_compiler/libs/parser/Cargo.toml b/rust_compiler/libs/parser/Cargo.toml index 336b498..1c5d935 100644 --- a/rust_compiler/libs/parser/Cargo.toml +++ b/rust_compiler/libs/parser/Cargo.toml @@ -4,10 +4,10 @@ version = "0.1.0" edition = "2024" [dependencies] -quick-error = { workspace = true } tokenizer = { path = "../tokenizer" } helpers = { path = "../helpers" } lsp-types = { workspace = true } +thiserror = { workspace = true } [dev-dependencies] diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index 2011ebb..8a99e9c 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -1,13 +1,12 @@ +pub mod sys_call; #[cfg(test)] mod test; - -pub mod sys_call; pub mod tree_node; use crate::sys_call::{Math, System}; -use quick_error::quick_error; -use std::io::SeekFrom; +use std::{borrow::Cow, io::SeekFrom}; use sys_call::SysCall; +use thiserror::Error; use tokenizer::{ self, Tokenizer, TokenizerBuffer, token::{Keyword, Symbol, Token, TokenType}, @@ -26,38 +25,33 @@ macro_rules! boxed { }; } -quick_error! { - #[derive(Debug)] - pub enum Error { - TokenizerError(err: tokenizer::Error) { - from() - display("Tokenizer Error: {}", err) - source(err) - } - UnexpectedToken(span: Span, token: Token) { - display("Unexpected token: {}", token.token_type) - } - DuplicateIdentifier(span: Span, token: Token) { - display("Duplicate identifier: {}", token.token_type) - } - InvalidSyntax(span: Span, reason: String) { - display("Invalid syntax: {}", reason) - } - UnsupportedKeyword(span: Span, token: Token) { - display("Unsupported keyword: {}", token.token_type) - } - UnexpectedEOF { - display("Unexpected EOF") - } - } +#[derive(Error, Debug)] +pub enum Error<'a> { + #[error(transparent)] + Tokenizer(#[from] tokenizer::Error), + + #[error("Unexpected token: {1}")] + UnexpectedToken(Span, Token<'a>), + + #[error("Duplicate identifier: {1}")] + DuplicateIdentifier(Span, Token<'a>), + + #[error("Invalid Syntax: {1}")] + InvalidSyntax(Span, String), + + #[error("Unsupported Keyword: {1}")] + UnsupportedKeyword(Span, Token<'a>), + + #[error("Unexpected End of File")] + UnexpectedEOF, } -impl From for lsp_types::Diagnostic { +impl<'a> From> for lsp_types::Diagnostic { fn from(value: Error) -> Self { use Error::*; use lsp_types::*; match value { - TokenizerError(e) => e.into(), + Tokenizer(e) => e.into(), UnexpectedToken(span, _) | DuplicateIdentifier(span, _) | InvalidSyntax(span, _) @@ -111,8 +105,8 @@ macro_rules! self_matches_current { pub struct Parser<'a> { tokenizer: TokenizerBuffer<'a>, - current_token: Option, - pub errors: Vec, + current_token: Option>, + pub errors: Vec>, } impl<'a> Parser<'a> { @@ -125,13 +119,12 @@ impl<'a> Parser<'a> { } /// Calculates a Span from a given Token reference. - fn token_to_span(t: &Token) -> Span { - let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); + fn token_to_span(t: &Token<'a>) -> Span { Span { start_line: t.line, - start_col: t.column, + start_col: t.span.start, end_line: t.line, - end_col: t.column + len, + end_col: t.span.end, } } @@ -148,9 +141,9 @@ impl<'a> Parser<'a> { } /// Helper to run a parsing closure and wrap the result in a Spanned struct - fn spanned(&mut self, parser: F) -> Result, Error> + fn spanned(&mut self, parser: F) -> Result, Error<'a>> where - F: FnOnce(&mut Self) -> Result, + F: FnOnce(&mut Self) -> Result>, { let start_token = if self.current_token.is_some() { self.current_token.clone() @@ -160,18 +153,16 @@ impl<'a> Parser<'a> { let (start_line, start_col) = start_token .as_ref() - .map(|t| (t.line, t.column)) - .unwrap_or((1, 1)); + .map(|t| (t.line, t.span.start)) + .unwrap_or((0, 0)); let node = parser(self)?; - let end_token = self.current_token.as_ref(); + let end_token = &self.current_token; let (end_line, end_col) = end_token - .map(|t| { - let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); - (t.line, t.column + len) - }) + .clone() + .map(|t| (t.line, t.span.end)) .unwrap_or((start_line, start_col)); Ok(Spanned { @@ -185,7 +176,7 @@ impl<'a> Parser<'a> { }) } - fn synchronize(&mut self) -> Result<(), Error> { + fn synchronize(&mut self) -> Result<(), Error<'a>> { self.assign_next()?; while let Some(token) = &self.current_token { @@ -211,20 +202,20 @@ impl<'a> Parser<'a> { Ok(()) } - pub fn parse_all(&mut self) -> Result, Error> { + pub fn parse_all(&mut self) -> Result>, Error<'a>> { let first_token = self.tokenizer.peek().unwrap_or(None); let (start_line, start_col) = first_token .as_ref() - .map(|tok| (tok.line, tok.column)) - .unwrap_or((1, 1)); + .map(|tok| (tok.line, tok.span.start)) + .unwrap_or((0, 0)); - let mut expressions = Vec::>::new(); + let mut expressions = Vec::>>::new(); loop { match self.tokenizer.peek() { Ok(None) => break, Err(e) => { - self.errors.push(Error::TokenizerError(e)); + self.errors.push(Error::Tokenizer(e)); break; } _ => {} @@ -246,10 +237,7 @@ impl<'a> Parser<'a> { let end_token_opt = self.tokenizer.peek().unwrap_or(None); let (end_line, end_col) = end_token_opt - .map(|tok| { - let len = tok.original_string.as_ref().map(|s| s.len()).unwrap_or(0); - (tok.line, tok.column + len) - }) + .map(|tok| (tok.line, tok.span.end)) .unwrap_or((start_line, start_col)); let span = Span { @@ -265,7 +253,7 @@ impl<'a> Parser<'a> { }))) } - pub fn parse(&mut self) -> Result>, Error> { + pub fn parse(&mut self) -> Result>>, Error<'a>> { self.assign_next()?; if self.current_token.is_none() { @@ -281,17 +269,17 @@ impl<'a> Parser<'a> { Ok(expr) } - fn assign_next(&mut self) -> Result<(), Error> { + fn assign_next(&mut self) -> Result<(), Error<'a>> { self.current_token = self.tokenizer.next_token()?; Ok(()) } - fn get_next(&mut self) -> Result, Error> { + fn get_next(&mut self) -> Result>, Error<'a>> { self.assign_next()?; - Ok(self.current_token.as_ref()) + Ok(self.current_token.clone()) } - fn expression(&mut self) -> Result>, Error> { + fn expression(&mut self) -> Result>>, Error<'a>> { // Parse the Left Hand Side (unary/primary expression) let lhs = self.unary()?; @@ -322,14 +310,14 @@ impl<'a> Parser<'a> { /// Handles dot notation chains: x.y.z() fn parse_postfix( &mut self, - mut lhs: Spanned, - ) -> Result, Error> { + mut lhs: Spanned>, + ) -> Result>, Error<'a>> { loop { if self_matches_peek!(self, TokenType::Symbol(Symbol::Dot)) { self.assign_next()?; // consume Dot let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let identifier_span = Self::token_to_span(identifier_token); + let identifier_span = Self::token_to_span(&identifier_token); let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { @@ -344,7 +332,7 @@ impl<'a> Parser<'a> { if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) { // Method Call self.assign_next()?; // consume '(' - let mut arguments = Vec::>::new(); + let mut arguments = Vec::>>::new(); while !token_matches!( self.get_next()?.ok_or(Error::UnexpectedEOF)?, @@ -366,8 +354,8 @@ impl<'a> Parser<'a> { { let next_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; return Err(Error::UnexpectedToken( - Self::token_to_span(next_token), - next_token.clone(), + Self::token_to_span(&next_token), + next_token, )); } @@ -429,7 +417,7 @@ impl<'a> Parser<'a> { Ok(lhs) } - fn unary(&mut self) -> Result>, Error> { + fn unary(&mut self) -> Result>>, Error<'a>> { macro_rules! matches_keyword { ($keyword:expr, $($pattern:pat),+) => { matches!($keyword, $($pattern)|+) @@ -507,10 +495,7 @@ impl<'a> Parser<'a> { let span = self.current_span(); let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } Some(Spanned { span, @@ -522,10 +507,7 @@ impl<'a> Parser<'a> { let span = self.current_span(); let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } Some(Spanned { span, @@ -586,17 +568,6 @@ impl<'a> Parser<'a> { let start_span = self.current_span(); self.assign_next()?; let inner_expr = self.unary()?.ok_or(Error::UnexpectedEOF)?; - // NOTE: Unary negation can also have postfix applied to the inner expression - // But generally -a.b parses as -(a.b), which is what parse_postfix ensures if called here. - // However, we call parse_postfix on the RESULT of unary in expression(), so - // `expression` sees `Negation`. `parse_postfix` doesn't apply to Negation node unless we allow it? - // Actually, `x.y` binds tighter than `-`. `postfix` logic belongs inside `unary` logic or - // `expression` logic. - // If I have `-x.y`, standard precedence says `-(x.y)`. - // `unary` returns `Negation(x)`. Then `expression` calls `postfix` on `Negation(x)`. - // `postfix` loop runs on `Negation`. This implies `(-x).y`. This is usually WRONG. - // `.` binds tighter than `-`. - // So `unary` must call `postfix` on the *operand* of the negation. let inner_with_postfix = self.parse_postfix(inner_expr)?; @@ -643,7 +614,7 @@ impl<'a> Parser<'a> { Ok(expr) } - fn get_infix_child_node(&mut self) -> Result, Error> { + fn get_infix_child_node(&mut self) -> Result>, Error<'a>> { let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; let start_span = self.current_span(); @@ -727,7 +698,7 @@ impl<'a> Parser<'a> { self.parse_postfix(expr) } - fn device(&mut self) -> Result { + fn device(&mut self) -> Result, Error<'a>> { let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; if !self_matches_current!(self, TokenType::Keyword(Keyword::Device)) { return Err(Error::UnexpectedToken( @@ -737,12 +708,12 @@ impl<'a> Parser<'a> { } let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let identifier_span = Self::token_to_span(identifier_token); + let identifier_span = Self::token_to_span(&identifier_token); let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { return Err(Error::UnexpectedToken( - Self::token_to_span(identifier_token), + Self::token_to_span(&identifier_token), identifier_token.clone(), )); } @@ -751,7 +722,7 @@ impl<'a> Parser<'a> { let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), + Self::token_to_span(¤t_token), current_token.clone(), )); } @@ -761,7 +732,7 @@ impl<'a> Parser<'a> { TokenType::String(ref id) => id.clone(), _ => { return Err(Error::UnexpectedToken( - Self::token_to_span(device_token), + Self::token_to_span(&device_token), device_token.clone(), )); } @@ -776,7 +747,10 @@ impl<'a> Parser<'a> { }) } - fn infix(&mut self, previous: Spanned) -> Result, Error> { + fn infix( + &mut self, + previous: Spanned>, + ) -> Result>, Error<'a>> { let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?.clone(); match previous.node { @@ -1084,7 +1058,7 @@ impl<'a> Parser<'a> { expressions.pop().ok_or(Error::UnexpectedEOF) } - fn priority(&mut self) -> Result>>, Error> { + fn priority(&mut self) -> Result>>>, Error<'a>> { let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { return Err(Error::UnexpectedToken( @@ -1099,15 +1073,15 @@ impl<'a> Parser<'a> { let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::RParen)) { return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), + Self::token_to_span(¤t_token), + current_token, )); } Ok(Some(boxed!(expression))) } - fn invocation(&mut self) -> Result { + fn invocation(&mut self) -> Result, Error<'a>> { let identifier_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; let identifier_span = Self::token_to_span(identifier_token); let identifier = match identifier_token.token_type { @@ -1123,8 +1097,8 @@ impl<'a> Parser<'a> { let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), + Self::token_to_span(¤t_token), + current_token, )); } @@ -1150,8 +1124,8 @@ impl<'a> Parser<'a> { { let next_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; return Err(Error::UnexpectedToken( - Self::token_to_span(next_token), - next_token.clone(), + Self::token_to_span(&next_token), + next_token, )); } @@ -1169,7 +1143,7 @@ impl<'a> Parser<'a> { }) } - fn block(&mut self) -> Result { + fn block(&mut self) -> Result, Error<'a>> { let mut expressions = Vec::>::new(); let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; @@ -1192,7 +1166,7 @@ impl<'a> Parser<'a> { if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) { // Need to capture return span - let ret_start_span = Self::token_to_span(current_token); + let ret_start_span = Self::token_to_span(¤t_token); self.assign_next()?; let expression = self.expression()?.ok_or(Error::UnexpectedEOF)?; @@ -1211,25 +1185,19 @@ impl<'a> Parser<'a> { let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::Semicolon)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::RBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } } Ok(BlockExpression(expressions)) } - fn const_declaration(&mut self) -> Result { + fn const_declaration(&mut self) -> Result, Error<'a>> { // const let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; if !self_matches_current!(self, TokenType::Keyword(Keyword::Const)) { @@ -1241,7 +1209,7 @@ impl<'a> Parser<'a> { // variable_name let ident_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let ident_span = Self::token_to_span(ident_token); + let ident_span = Self::token_to_span(&ident_token); let ident = match ident_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => return Err(Error::UnexpectedToken(ident_span, ident_token.clone())), @@ -1299,7 +1267,7 @@ impl<'a> Parser<'a> { } } - fn declaration(&mut self) -> Result { + fn declaration(&mut self) -> Result, Error<'a>> { let current_token = self.current_token.as_ref().ok_or(Error::UnexpectedEOF)?; if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) { return Err(Error::UnexpectedToken( @@ -1308,13 +1276,13 @@ impl<'a> Parser<'a> { )); } let identifier_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let identifier_span = Self::token_to_span(identifier_token); + let identifier_span = Self::token_to_span(&identifier_token); let identifier = match identifier_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { return Err(Error::UnexpectedToken( - Self::token_to_span(identifier_token), - identifier_token.clone(), + Self::token_to_span(&identifier_token), + identifier_token, )); } }; @@ -1334,8 +1302,8 @@ impl<'a> Parser<'a> { let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::Semicolon)) { return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), + Self::token_to_span(¤t_token), + current_token, )); } @@ -1348,7 +1316,7 @@ impl<'a> Parser<'a> { )) } - fn literal(&mut self) -> Result { + fn literal(&mut self) -> Result, Error<'a>> { let current_token = self.current_token.clone().ok_or(Error::UnexpectedEOF)?; let literal = match current_token.token_type { TokenType::Number(num) => Literal::Number(num), @@ -1358,11 +1326,11 @@ impl<'a> Parser<'a> { Some(Token { token_type: TokenType::Number(num), .. - }) => Literal::Number(-*num), + }) => Literal::Number(-num), Some(wrong_token) => { return Err(Error::UnexpectedToken( - Self::token_to_span(wrong_token), - wrong_token.clone(), + Self::token_to_span(&wrong_token), + wrong_token, )); } None => return Err(Error::UnexpectedEOF), @@ -1378,14 +1346,11 @@ impl<'a> Parser<'a> { Ok(literal) } - fn if_expression(&mut self) -> Result { + fn if_expression(&mut self) -> Result, Error<'a>> { // 'if' is current let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } self.assign_next()?; @@ -1393,18 +1358,12 @@ impl<'a> Parser<'a> { let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::RParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } let body = self.spanned(|p| p.block())?; @@ -1429,10 +1388,7 @@ impl<'a> Parser<'a> { })) } else { let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } } else { None @@ -1445,13 +1401,10 @@ impl<'a> Parser<'a> { }) } - fn loop_expression(&mut self) -> Result { + fn loop_expression(&mut self) -> Result, Error<'a>> { let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } let body = self.spanned(|p| p.block())?; @@ -1459,13 +1412,10 @@ impl<'a> Parser<'a> { Ok(LoopExpression { body }) } - fn while_expression(&mut self) -> Result { + fn while_expression(&mut self) -> Result, Error<'a>> { let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } self.assign_next()?; @@ -1473,18 +1423,12 @@ impl<'a> Parser<'a> { let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::RParen)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(next, TokenType::Symbol(Symbol::LBrace)) { - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } let body = self.block()?; @@ -1495,29 +1439,26 @@ impl<'a> Parser<'a> { }) } - fn function(&mut self) -> Result { + fn function(&mut self) -> Result, Error<'a>> { // 'fn' is current let fn_ident_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - let fn_ident_span = Self::token_to_span(fn_ident_token); + let fn_ident_span = Self::token_to_span(&fn_ident_token); let fn_ident = match fn_ident_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { - return Err(Error::UnexpectedToken( - Self::token_to_span(fn_ident_token), - fn_ident_token.clone(), - )); + return Err(Error::UnexpectedToken(fn_ident_span, fn_ident_token)); } }; let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), + Self::token_to_span(¤t_token), + current_token, )); } - let mut arguments = Vec::>::new(); + let mut arguments = Vec::>>::new(); while !token_matches!( self.get_next()?.ok_or(Error::UnexpectedEOF)?, @@ -1528,10 +1469,7 @@ impl<'a> Parser<'a> { let argument = match current_token.token_type { TokenType::Identifier(ref id) => id.clone(), _ => { - return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), - )); + return Err(Error::UnexpectedToken(arg_span, current_token.clone())); } }; @@ -1553,10 +1491,7 @@ impl<'a> Parser<'a> { && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { let next = self.get_next()?.ok_or(Error::UnexpectedEOF)?; - return Err(Error::UnexpectedToken( - Self::token_to_span(next), - next.clone(), - )); + return Err(Error::UnexpectedToken(Self::token_to_span(&next), next)); } if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { @@ -1567,8 +1502,8 @@ impl<'a> Parser<'a> { let current_token = self.get_next()?.ok_or(Error::UnexpectedEOF)?; if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) { return Err(Error::UnexpectedToken( - Self::token_to_span(current_token), - current_token.clone(), + Self::token_to_span(¤t_token), + current_token, )); }; @@ -1582,15 +1517,15 @@ impl<'a> Parser<'a> { }) } - fn syscall(&mut self) -> Result { - fn check_length( - parser: &Parser, - arguments: &[Spanned], + fn syscall(&mut self) -> Result, Error<'a>> { + fn check_length<'a>( + span: Span, + arguments: &[Spanned>], length: usize, - ) -> Result<(), Error> { + ) -> Result<(), Error<'a>> { if arguments.len() != length { return Err(Error::InvalidSyntax( - parser.current_span(), + span, format!("Expected {} arguments", length), )); } @@ -1648,20 +1583,20 @@ impl<'a> Parser<'a> { let invocation = self.invocation()?; - match invocation.name.node.as_str() { + match invocation.name.node.as_ref() { // System SysCalls "yield" => { - check_length(self, &invocation.arguments, 0)?; + check_length(self.current_span(), &invocation.arguments, 0)?; Ok(SysCall::System(sys_call::System::Yield)) } "sleep" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut arg = invocation.arguments.into_iter(); let expr = arg.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::System(System::Sleep(boxed!(expr)))) } "hash" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let lit_str = literal_or_variable!(args.next()); @@ -1682,7 +1617,7 @@ impl<'a> Parser<'a> { }))) } "load" | "l" => { - check_length(self, &invocation.arguments, 2)?; + check_length(self.current_span(), &invocation.arguments, 2)?; let mut args = invocation.arguments.into_iter(); let tmp = args.next(); @@ -1727,7 +1662,7 @@ impl<'a> Parser<'a> { ))) } "loadBatched" | "lb" => { - check_length(self, &invocation.arguments, 3)?; + check_length(self.current_span(), &invocation.arguments, 3)?; let mut args = invocation.arguments.into_iter(); let tmp = args.next(); let device_hash = literal_or_variable!(tmp); @@ -1745,7 +1680,7 @@ impl<'a> Parser<'a> { ))) } "loadBatchedNamed" | "lbn" => { - check_length(self, &invocation.arguments, 4)?; + check_length(self.current_span(), &invocation.arguments, 4)?; let mut args = invocation.arguments.into_iter(); let tmp = args.next(); let dev_hash = literal_or_variable!(tmp); @@ -1764,7 +1699,7 @@ impl<'a> Parser<'a> { ))) } "set" | "s" => { - check_length(self, &invocation.arguments, 3)?; + check_length(self.current_span(), &invocation.arguments, 3)?; let mut args = invocation.arguments.into_iter(); let tmp = args.next(); let device = literal_or_variable!(tmp); @@ -1776,14 +1711,16 @@ impl<'a> Parser<'a> { Ok(SysCall::System(sys_call::System::SetOnDevice( device, Spanned { - node: Literal::String(logic_type.node.to_string().replace("\"", "")), + node: Literal::String(Cow::from( + logic_type.node.to_string().replace("\"", ""), + )), span: logic_type.span, }, boxed!(variable), ))) } "setBatched" | "sb" => { - check_length(self, &invocation.arguments, 3)?; + check_length(self.current_span(), &invocation.arguments, 3)?; let mut args = invocation.arguments.into_iter(); let tmp = args.next(); let device_hash = literal_or_variable!(tmp); @@ -1795,14 +1732,14 @@ impl<'a> Parser<'a> { Ok(SysCall::System(sys_call::System::SetOnDeviceBatched( device_hash, Spanned { - node: Literal::String(logic_type.to_string().replace("\"", "")), + node: Literal::String(Cow::from(logic_type.to_string().replace("\"", ""))), span: logic_type.span, }, boxed!(variable), ))) } "setBatchedNamed" | "sbn" => { - check_length(self, &invocation.arguments, 4)?; + check_length(self.current_span(), &invocation.arguments, 4)?; let mut args = invocation.arguments.into_iter(); let tmp = args.next(); let device_hash = literal_or_variable!(tmp); @@ -1825,28 +1762,28 @@ impl<'a> Parser<'a> { } // Math SysCalls "acos" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let tmp = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Acos(boxed!(tmp)))) } "asin" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let tmp = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Asin(boxed!(tmp)))) } "atan" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let expr = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Atan(boxed!(expr)))) } "atan2" => { - check_length(self, &invocation.arguments, 2)?; + check_length(self.current_span(), &invocation.arguments, 2)?; let mut args = invocation.arguments.into_iter(); let arg1 = args.next().ok_or(Error::UnexpectedEOF)?; let arg2 = args.next().ok_or(Error::UnexpectedEOF)?; @@ -1854,42 +1791,42 @@ impl<'a> Parser<'a> { Ok(SysCall::Math(Math::Atan2(boxed!(arg1), boxed!(arg2)))) } "abs" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let expr = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Abs(boxed!(expr)))) } "ceil" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let arg = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Ceil(boxed!(arg)))) } "cos" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let arg = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Cos(boxed!(arg)))) } "floor" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let arg = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Floor(boxed!(arg)))) } "log" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let arg = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Log(boxed!(arg)))) } "max" => { - check_length(self, &invocation.arguments, 2)?; + check_length(self.current_span(), &invocation.arguments, 2)?; let mut args = invocation.arguments.into_iter(); let arg1 = args.next().ok_or(Error::UnexpectedEOF)?; let arg2 = args.next().ok_or(Error::UnexpectedEOF)?; @@ -1897,7 +1834,7 @@ impl<'a> Parser<'a> { Ok(SysCall::Math(Math::Max(boxed!(arg1), boxed!(arg2)))) } "min" => { - check_length(self, &invocation.arguments, 2)?; + check_length(self.current_span(), &invocation.arguments, 2)?; let mut args = invocation.arguments.into_iter(); let arg1 = args.next().ok_or(Error::UnexpectedEOF)?; let arg2 = args.next().ok_or(Error::UnexpectedEOF)?; @@ -1905,32 +1842,32 @@ impl<'a> Parser<'a> { Ok(SysCall::Math(Math::Min(boxed!(arg1), boxed!(arg2)))) } "rand" => { - check_length(self, &invocation.arguments, 0)?; + check_length(self.current_span(), &invocation.arguments, 0)?; Ok(SysCall::Math(Math::Rand)) } "sin" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let arg = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Sin(boxed!(arg)))) } "sqrt" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let arg = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Sqrt(boxed!(arg)))) } "tan" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let arg = args.next().ok_or(Error::UnexpectedEOF)?; Ok(SysCall::Math(Math::Tan(boxed!(arg)))) } "trunc" => { - check_length(self, &invocation.arguments, 1)?; + check_length(self.current_span(), &invocation.arguments, 1)?; let mut args = invocation.arguments.into_iter(); let arg = args.next().ok_or(Error::UnexpectedEOF)?; diff --git a/rust_compiler/libs/parser/src/sys_call.rs b/rust_compiler/libs/parser/src/sys_call.rs index c44ec95..e90e837 100644 --- a/rust_compiler/libs/parser/src/sys_call.rs +++ b/rust_compiler/libs/parser/src/sys_call.rs @@ -4,73 +4,73 @@ use helpers::prelude::*; documented! { #[derive(Debug, PartialEq, Eq)] - pub enum Math { + pub enum Math<'a> { /// Returns the angle in radians whose cosine is the specified number. /// ## IC10 /// `acos r? a(r?|num)` /// ## Slang /// `let item = acos(number|var|expression);` - Acos(Box>), + Acos(Box>>), /// Returns the angle in radians whose sine is the specified number. /// ## IC10 /// `asin r? a(r?|num)` /// ## Slang /// `let item = asin(number|var|expression);` - Asin(Box>), + Asin(Box>>), /// Returns the angle in radians whose tangent is the specified number. /// ## IC10 /// `atan r? a(r?|num)` /// ## Slang /// `let item = atan(number|var|expression);` - Atan(Box>), + Atan(Box>>), /// Returns the angle in radians whose tangent is the quotient of the specified numbers. /// ## IC10 /// `atan2 r? a(r?|num) b(r?|num)` /// ## Slang /// `let item = atan2((number|var|expression), (number|var|expression));` - Atan2(Box>, Box>), + Atan2(Box>>, Box>>), /// Gets the absolute value of a number. /// ## IC10 /// `abs r? a(r?|num)` /// ## Slang /// `let item = abs((number|var|expression));` - Abs(Box>), + Abs(Box>>), /// Rounds a number up to the nearest whole number. /// ## IC10 /// `ceil r? a(r?|num)` /// ## Slang /// `let item = ceil((number|var|expression));` - Ceil(Box>), + Ceil(Box>>), /// Returns the cosine of the specified angle in radians. /// ## IC10 /// `cos r? a(r?|num)` /// ## Slang /// `let item = cos((number|var|expression));` - Cos(Box>), + Cos(Box>>), /// Rounds a number down to the nearest whole number. /// ## IC10 /// `floor r? a(r?|num)` /// ## Slang /// `let item = floor((number|var|expression));` - Floor(Box>), + Floor(Box>>), /// Computes the natural logarithm of a number. /// ## IC10 /// `log r? a(r?|num)` /// ## Slang /// `let item = log((number|var|expression));` - Log(Box>), + Log(Box>>), /// Computes the maximum of two numbers. /// ## IC10 /// `max r? a(r?|num) b(r?|num)` /// ## Slang /// `let item = max((number|var|expression), (number|var|expression));` - Max(Box>, Box>), + Max(Box>>, Box>>), /// Computes the minimum of two numbers. /// ## IC10 /// `min r? a(r?|num) b(r?|num)` /// ## Slang /// `let item = min((number|var|expression), (number|var|expression));` - Min(Box>, Box>), + Min(Box>>, Box>>), /// Gets a random number between 0 and 1. /// ## IC10 /// `rand r?` @@ -82,29 +82,29 @@ documented! { /// `sin r? a(r?|num)` /// ## Slang /// `let item = sin((number|var|expression));` - Sin(Box>), + Sin(Box>>), /// Computes the square root of a number. /// ## IC10 /// `sqrt r? a(r?|num)` /// ## Slang /// `let item = sqrt((number|var|expression));` - Sqrt(Box>), + Sqrt(Box>>), /// Returns the tangent of the specified angle in radians. /// ## IC10 /// `tan r? a(r?|num)` /// ## Slang /// `let item = tan((number|var|expression));` - Tan(Box>), + Tan(Box>>), /// Truncates a number by removing the decimal portion. /// ## IC10 /// `trunc r? a(r?|num)` /// ## Slang /// `let item = trunc((number|var|expression));` - Trunc(Box>), + Trunc(Box>>), } } -impl std::fmt::Display for Math { +impl<'a> std::fmt::Display for Math<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Math::Acos(a) => write!(f, "acos({})", a), @@ -129,7 +129,7 @@ impl std::fmt::Display for Math { documented! { #[derive(Debug, PartialEq, Eq)] - pub enum System { + pub enum System<'a> { /// Pauses execution for exactly 1 tick and then resumes. /// ## IC10 /// `yield` @@ -141,7 +141,7 @@ documented! { /// `sleep a(r?|num)` /// ## Slang /// `sleep(number|var);` - Sleep(Box>), + Sleep(Box>>), /// Gets the in-game hash for a specific prefab name. NOTE! This call is COMPLETELY /// optimized away unless you bind it to a `let` variable. If you use a `const` variable /// however, the hash is correctly computed at compile time and substitued automatically. @@ -155,7 +155,7 @@ documented! { /// const compDoor = hash("StructureCompositeDoor"); /// setOnDeviceBatched(compDoor, "Lock", true); /// ``` - Hash(Spanned), + Hash(Spanned>), /// Represents a function which loads a device variable into a register. /// ## IC10 /// `l r? d? var` @@ -163,7 +163,7 @@ documented! { /// `let item = load(deviceHash, "LogicType");` /// `let item = l(deviceHash, "LogicType");` /// `let item = deviceAlias.LogicType;` - LoadFromDevice(Spanned, Spanned), + LoadFromDevice(Spanned>, Spanned>), /// Function which gets a LogicType from all connected network devices that match /// the provided device hash and name, aggregating them via a batchMode /// ## IC10 @@ -172,10 +172,10 @@ documented! { /// `loadBatchedNamed(deviceHash, deviceName, "LogicType", "BatchMode");` /// `lbn(deviceHash, deviceName, "LogicType", "BatchMode");` LoadBatchNamed( - Spanned, - Spanned, - Spanned, - Spanned, + Spanned>, + Spanned>, + Spanned>, + Spanned>, ), /// Loads a LogicType from all connected network devices, aggregating them via a /// BatchMode @@ -184,7 +184,7 @@ documented! { /// ## Slang /// `loadBatched(deviceHash, "Variable", "LogicType");` /// `lb(deviceHash, "Variable", "LogicType");` - LoadBatch(Spanned, Spanned, Spanned), + LoadBatch(Spanned>, Spanned>, Spanned>), /// Represents a function which stores a setting into a specific device. /// ## IC10 /// `s d? logicType r?` @@ -192,7 +192,7 @@ documented! { /// `set(deviceHash, "LogicType", (number|var));` /// `s(deviceHash, "LogicType", (number|var));` /// `deviceAlias.LogicType = (number|var);` - SetOnDevice(Spanned, Spanned, Box>), + SetOnDevice(Spanned>, Spanned>, Box>>), /// Represents a function which stores a setting to all devices that match /// the given deviceHash /// ## IC10 @@ -200,7 +200,7 @@ documented! { /// ## Slang /// `setBatched(deviceHash, "LogicType", (number|var));` /// `sb(deviceHash, "LogicType", (number|var));` - SetOnDeviceBatched(Spanned, Spanned, Box>), + SetOnDeviceBatched(Spanned>, Spanned>, Box>>), /// Represents a function which stores a setting to all devices that match /// both the given deviceHash AND the given nameHash /// ## IC10 @@ -209,15 +209,15 @@ documented! { /// `setBatchedNamed(deviceHash, nameHash, "LogicType", (number|var));` /// `sbn(deviceHash, nameHash, "LogicType", (number|var));` SetOnDeviceBatchedNamed( - Spanned, - Spanned, - Spanned, - Box>, + Spanned>, + Spanned>, + Spanned>, + Box>>, ), } } -impl std::fmt::Display for System { +impl<'a> std::fmt::Display for System<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { System::Yield => write!(f, "yield()"), @@ -242,13 +242,13 @@ impl std::fmt::Display for System { #[allow(clippy::large_enum_variant)] #[derive(Debug, PartialEq, Eq)] /// This represents built in functions that cannot be overwritten, but can be invoked by the user as functions. -pub enum SysCall { - System(System), +pub enum SysCall<'a> { + System(System<'a>), /// Represents any mathmatical function that can be called. - Math(Math), + Math(Math<'a>), } -impl Documentation for SysCall { +impl<'a> Documentation for SysCall<'a> { fn docs(&self) -> String { match self { Self::System(s) => s.docs(), @@ -264,7 +264,7 @@ impl Documentation for SysCall { } } -impl std::fmt::Display for SysCall { +impl<'a> std::fmt::Display for SysCall<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { SysCall::System(s) => write!(f, "{}", s), @@ -273,7 +273,7 @@ impl std::fmt::Display for SysCall { } } -impl SysCall { +impl<'a> SysCall<'a> { pub fn is_syscall(identifier: &str) -> bool { tokenizer::token::is_syscall(identifier) } diff --git a/rust_compiler/libs/parser/src/tree_node.rs b/rust_compiler/libs/parser/src/tree_node.rs index 350e4e6..8e221a8 100644 --- a/rust_compiler/libs/parser/src/tree_node.rs +++ b/rust_compiler/libs/parser/src/tree_node.rs @@ -1,24 +1,22 @@ -use std::ops::Deref; - -use crate::sys_call; - use super::sys_call::SysCall; +use crate::sys_call; +use std::{borrow::Cow, ops::Deref}; use tokenizer::token::Number; #[derive(Debug, Eq, PartialEq, Clone)] -pub enum Literal { +pub enum Literal<'a> { Number(Number), - String(String), + String(Cow<'a, str>), Boolean(bool), } #[derive(Debug, Eq, PartialEq, Clone)] -pub enum LiteralOr { - Literal(Spanned), +pub enum LiteralOr<'a, T> { + Literal(Spanned>), Or(Spanned), } -impl std::fmt::Display for LiteralOr { +impl<'a, T: std::fmt::Display> std::fmt::Display for LiteralOr<'a, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Literal(l) => write!(f, "{l}"), @@ -27,7 +25,7 @@ impl std::fmt::Display for LiteralOr { } } -impl std::fmt::Display for Literal { +impl<'a> std::fmt::Display for Literal<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Literal::Number(n) => write!(f, "{}", n), @@ -38,16 +36,16 @@ impl std::fmt::Display for Literal { } #[derive(Debug, PartialEq, Eq)] -pub enum BinaryExpression { - Add(Box>, Box>), - Multiply(Box>, Box>), - Divide(Box>, Box>), - Subtract(Box>, Box>), - Exponent(Box>, Box>), - Modulo(Box>, Box>), +pub enum BinaryExpression<'a> { + Add(Box>>, Box>>), + Multiply(Box>>, Box>>), + Divide(Box>>, Box>>), + Subtract(Box>>, Box>>), + Exponent(Box>>, Box>>), + Modulo(Box>>, Box>>), } -impl std::fmt::Display for BinaryExpression { +impl<'a> std::fmt::Display for BinaryExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { BinaryExpression::Add(l, r) => write!(f, "({} + {})", l, r), @@ -61,19 +59,19 @@ impl std::fmt::Display for BinaryExpression { } #[derive(Debug, PartialEq, Eq)] -pub enum LogicalExpression { - And(Box>, Box>), - Or(Box>, Box>), - Not(Box>), - Equal(Box>, Box>), - NotEqual(Box>, Box>), - GreaterThan(Box>, Box>), - GreaterThanOrEqual(Box>, Box>), - LessThan(Box>, Box>), - LessThanOrEqual(Box>, Box>), +pub enum LogicalExpression<'a> { + And(Box>>, Box>>), + Or(Box>>, Box>>), + Not(Box>>), + Equal(Box>>, Box>>), + NotEqual(Box>>, Box>>), + GreaterThan(Box>>, Box>>), + GreaterThanOrEqual(Box>>, Box>>), + LessThan(Box>>, Box>>), + LessThanOrEqual(Box>>, Box>>), } -impl std::fmt::Display for LogicalExpression { +impl<'a> std::fmt::Display for LogicalExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { LogicalExpression::And(l, r) => write!(f, "({} && {})", l, r), @@ -90,25 +88,25 @@ impl std::fmt::Display for LogicalExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct AssignmentExpression { - pub assignee: Box>, - pub expression: Box>, +pub struct AssignmentExpression<'a> { + pub assignee: Box>>, + pub expression: Box>>, } -impl std::fmt::Display for AssignmentExpression { +impl<'a> std::fmt::Display for AssignmentExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "({} = {})", self.assignee, self.expression) } } #[derive(Debug, PartialEq, Eq)] -pub struct FunctionExpression { - pub name: Spanned, - pub arguments: Vec>, - pub body: BlockExpression, +pub struct FunctionExpression<'a> { + pub name: Spanned>, + pub arguments: Vec>>, + pub body: BlockExpression<'a>, } -impl std::fmt::Display for FunctionExpression { +impl<'a> std::fmt::Display for FunctionExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -125,9 +123,9 @@ impl std::fmt::Display for FunctionExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct BlockExpression(pub Vec>); +pub struct BlockExpression<'a>(pub Vec>>); -impl std::fmt::Display for BlockExpression { +impl<'a> std::fmt::Display for BlockExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -142,12 +140,12 @@ impl std::fmt::Display for BlockExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct InvocationExpression { - pub name: Spanned, - pub arguments: Vec>, +pub struct InvocationExpression<'a> { + pub name: Spanned>, + pub arguments: Vec>>, } -impl std::fmt::Display for InvocationExpression { +impl<'a> std::fmt::Display for InvocationExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -163,25 +161,25 @@ impl std::fmt::Display for InvocationExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct MemberAccessExpression { - pub object: Box>, - pub member: Spanned, +pub struct MemberAccessExpression<'a> { + pub object: Box>>, + pub member: Spanned>, } -impl std::fmt::Display for MemberAccessExpression { +impl<'a> std::fmt::Display for MemberAccessExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}.{}", self.object, self.member) } } #[derive(Debug, PartialEq, Eq)] -pub struct MethodCallExpression { - pub object: Box>, - pub method: Spanned, - pub arguments: Vec>, +pub struct MethodCallExpression<'a> { + pub object: Box>>, + pub method: Spanned>, + pub arguments: Vec>>, } -impl std::fmt::Display for MethodCallExpression { +impl<'a> std::fmt::Display for MethodCallExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -198,12 +196,12 @@ impl std::fmt::Display for MethodCallExpression { } #[derive(Debug, PartialEq, Eq)] -pub enum LiteralOrVariable { - Literal(Literal), - Variable(Spanned), +pub enum LiteralOrVariable<'a> { + Literal(Literal<'a>), + Variable(Spanned>), } -impl std::fmt::Display for LiteralOrVariable { +impl<'a> std::fmt::Display for LiteralOrVariable<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { LiteralOrVariable::Literal(l) => write!(f, "{}", l), @@ -213,46 +211,46 @@ impl std::fmt::Display for LiteralOrVariable { } #[derive(Debug, PartialEq, Eq)] -pub struct ConstDeclarationExpression { - pub name: Spanned, - pub value: LiteralOr, +pub struct ConstDeclarationExpression<'a> { + pub name: Spanned>, + pub value: LiteralOr<'a, SysCall<'a>>, } -impl ConstDeclarationExpression { +impl<'a> ConstDeclarationExpression<'a> { pub fn is_syscall_supported(call: &SysCall) -> bool { use sys_call::System; matches!(call, SysCall::System(sys) if matches!(sys, System::Hash(_))) } } -impl std::fmt::Display for ConstDeclarationExpression { +impl<'a> std::fmt::Display for ConstDeclarationExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(const {} = {})", self.name, self.value) } } #[derive(Debug, PartialEq, Eq)] -pub struct DeviceDeclarationExpression { +pub struct DeviceDeclarationExpression<'a> { /// any variable-like name - pub name: Spanned, + pub name: Spanned>, /// The device port, ex. (db, d0, d1, d2, d3, d4, d5) - pub device: String, + pub device: Cow<'a, str>, } -impl std::fmt::Display for DeviceDeclarationExpression { +impl<'a> std::fmt::Display for DeviceDeclarationExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(device {} = {})", self.name, self.device) } } #[derive(Debug, PartialEq, Eq)] -pub struct IfExpression { - pub condition: Box>, - pub body: Spanned, - pub else_branch: Option>>, +pub struct IfExpression<'a> { + pub condition: Box>>, + pub body: Spanned>, + pub else_branch: Option>>>, } -impl std::fmt::Display for IfExpression { +impl<'a> std::fmt::Display for IfExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(if ({}) {}", self.condition, self.body)?; if let Some(else_branch) = &self.else_branch { @@ -263,23 +261,23 @@ impl std::fmt::Display for IfExpression { } #[derive(Debug, PartialEq, Eq)] -pub struct LoopExpression { - pub body: Spanned, +pub struct LoopExpression<'a> { + pub body: Spanned>, } -impl std::fmt::Display for LoopExpression { +impl<'a> std::fmt::Display for LoopExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(loop {})", self.body) } } #[derive(Debug, PartialEq, Eq)] -pub struct WhileExpression { - pub condition: Box>, - pub body: BlockExpression, +pub struct WhileExpression<'a> { + pub condition: Box>>, + pub body: BlockExpression<'a>, } -impl std::fmt::Display for WhileExpression { +impl<'a> std::fmt::Display for WhileExpression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "(while {} {})", self.condition, self.body) } @@ -347,32 +345,32 @@ impl Deref for Spanned { } #[derive(Debug, PartialEq, Eq)] -pub enum Expression { - Assignment(Spanned), - Binary(Spanned), - Block(Spanned), +pub enum Expression<'a> { + Assignment(Spanned>), + Binary(Spanned>), + Block(Spanned>), Break(Span), - ConstDeclaration(Spanned), + ConstDeclaration(Spanned>), Continue(Span), - Declaration(Spanned, Box>), - DeviceDeclaration(Spanned), - Function(Spanned), - If(Spanned), - Invocation(Spanned), - Literal(Spanned), - Logical(Spanned), - Loop(Spanned), - MemberAccess(Spanned), - MethodCall(Spanned), - Negation(Box>), - Priority(Box>), - Return(Box>), - Syscall(Spanned), - Variable(Spanned), - While(Spanned), + Declaration(Spanned>, Box>>), + DeviceDeclaration(Spanned>), + Function(Spanned>), + If(Spanned>), + Invocation(Spanned>), + Literal(Spanned>), + Logical(Spanned>), + Loop(Spanned>), + MemberAccess(Spanned>), + MethodCall(Spanned>), + Negation(Box>>), + Priority(Box>>), + Return(Box>>), + Syscall(Spanned>), + Variable(Spanned>), + While(Spanned>), } -impl std::fmt::Display for Expression { +impl<'a> std::fmt::Display for Expression<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Expression::Assignment(e) => write!(f, "{}", e), diff --git a/rust_compiler/libs/tokenizer/Cargo.toml b/rust_compiler/libs/tokenizer/Cargo.toml index 7433cab..a08373c 100644 --- a/rust_compiler/libs/tokenizer/Cargo.toml +++ b/rust_compiler/libs/tokenizer/Cargo.toml @@ -5,9 +5,10 @@ edition = "2024" [dependencies] rust_decimal = { workspace = true } -quick-error = { workspace = true } lsp-types = { workspace = true } +thiserror = { workspace = true } helpers = { path = "../helpers" } +logos = "0.16" [dev-dependencies] anyhow = { version = "^1" } diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs index 3d8dabb..c991047 100644 --- a/rust_compiler/libs/tokenizer/src/lib.rs +++ b/rust_compiler/libs/tokenizer/src/lib.rs @@ -1,38 +1,20 @@ pub mod token; -use quick_error::quick_error; -use rust_decimal::Decimal; +use logos::{Lexer, Logos}; use std::{ cmp::Ordering, collections::VecDeque, - io::{BufReader, Cursor, Read, Seek, SeekFrom}, - path::PathBuf, + io::{Read, Seek, SeekFrom}, }; -use token::{Keyword, Number, Symbol, Temperature, Token, TokenType}; +use thiserror::Error; +use token::{Token, TokenType}; -quick_error! { - #[derive(Debug)] - pub enum Error { - IOError(err: std::io::Error) { - from() - display("IO Error: {}", err) - source(err) - } - NumberParseError(err: std::num::ParseIntError, line: usize, column: usize, original: String) { - display("Number Parse Error: {}", err) - source(err) - } - DecimalParseError(err: rust_decimal::Error, line: usize, column: usize, original: String) { - display("Decimal Parse Error: {}", err) - source(err) - } - UnknownSymbolError(char: char, line: usize, column: usize, original: String) { - display("Unknown Symbol: {}", char) - } - UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize, original: String) { - display("Unknown Keyword or Identifier: {}", val) - } - } +#[derive(Error, Debug)] +pub enum Error { + #[error(transparent)] + IOError(#[from()] std::io::Error), + #[error(transparent)] + LexError(#[from] token::LexError), } impl From for lsp_types::Diagnostic { @@ -46,24 +28,7 @@ impl From for lsp_types::Diagnostic { severity: Some(DiagnosticSeverity::ERROR), ..Default::default() }, - NumberParseError(_, l, c, ref og) - | DecimalParseError(_, l, c, ref og) - | UnknownSymbolError(_, l, c, ref og) - | UnknownKeywordOrIdentifierError(_, l, c, ref og) => Diagnostic { - range: Range { - start: Position { - line: l as u32, - character: c as u32, - }, - end: Position { - line: l as u32, - character: (c + og.len()) as u32, - }, - }, - message: value.to_string(), - severity: Some(DiagnosticSeverity::ERROR), - ..Default::default() - }, + LexError(e) => e.into(), } } } @@ -73,452 +38,68 @@ pub trait Tokenize: Read + Seek {} impl Tokenize for T where T: Read + Seek {} pub struct Tokenizer<'a> { - reader: BufReader>, - char_buffer: [u8; 1], - line: usize, - column: usize, + lexer: Lexer<'a, TokenType<'a>>, returned_eof: bool, - string_buffer: String, -} - -impl<'a> Tokenizer<'a> { - pub fn from_path(input_file: impl Into) -> Result { - let file = std::fs::File::open(input_file.into())?; - let reader = BufReader::new(Box::new(file) as Box); - - Ok(Self { - reader, - line: 1, - column: 0, // Start at 0 so first char becomes 1 - char_buffer: [0], - returned_eof: false, - string_buffer: String::new(), - }) - } -} - -impl<'a> From for Tokenizer<'a> { - fn from(input: String) -> Self { - let reader = BufReader::new(Box::new(Cursor::new(input)) as Box); - - Self { - reader, - line: 1, - column: 0, - char_buffer: [0], - returned_eof: false, - string_buffer: String::new(), - } - } } impl<'a> From<&'a str> for Tokenizer<'a> { fn from(value: &'a str) -> Self { Self { - reader: BufReader::new(Box::new(Cursor::new(value)) as Box), - char_buffer: [0], - column: 0, - line: 1, + lexer: TokenType::lexer(value), returned_eof: false, - string_buffer: String::new(), } } } impl<'a> Tokenizer<'a> { - fn next_char(&mut self) -> Result, Error> { - let bytes_read = self.reader.read(&mut self.char_buffer)?; - - if bytes_read == 0 { - return Ok(None); - } - - let c = self.char_buffer[0] as char; - if c == '\n' { - self.line += 1; - self.column = 1; - } else { - self.column += 1; - } - - self.string_buffer.push(c); - Ok(Some(c)) + fn get_token(&mut self, t_type: TokenType<'a>) -> Token<'a> { + let mut span = self.lexer.span(); + span.start -= self.lexer.extras.line_start_index; + span.end -= self.lexer.extras.line_start_index; + Token::new(t_type, self.lexer.extras.line_count, span) } - fn peek_next_char(&mut self) -> Result, Error> { - let current_pos = self.reader.stream_position()?; - let to_return = if self.reader.read(&mut self.char_buffer)? == 0 { - None - } else { - self.reader.seek(SeekFrom::Start(current_pos))?; - Some(self.char_buffer[0] as char) - }; - Ok(to_return) - } + pub fn next_token(&mut self) -> Result>, Error> { + let mut current = self.lexer.next().transpose(); - fn skip_line(&mut self) -> Result<(), Error> { - while let Some(next_char) = self.next_char()? { - if next_char == '\n' { - break; - } - } - Ok(()) - } - - pub fn next_token(&mut self) -> Result, Error> { - self.string_buffer.clear(); - - while let Some(next_char) = self.next_char()? { - if next_char.is_whitespace() { - self.string_buffer.clear(); - continue; - } - if next_char == '/' && self.peek_next_char()? == Some('/') { - self.skip_line()?; - self.string_buffer.clear(); - continue; - } - - // Capture start position before delegating - let start_line = self.line; - let start_col = self.column; - - match next_char { - '0'..='9' => { - return self - .tokenize_number(next_char, start_line, start_col) - .map(Some); - } - '"' | '\'' => { - return self - .tokenize_string(next_char, start_line, start_col) - .map(Some); - } - char if !char.is_alphanumeric() && char != '"' && char != '\'' => { - return self - .tokenize_symbol(next_char, start_line, start_col) - .map(Some); - } - char if char.is_alphabetic() || char == '_' => { - return self - .tokenize_keyword_or_identifier(next_char, start_line, start_col) - .map(Some); - } - _ => { - return Err(Error::UnknownSymbolError( - next_char, - start_line, - start_col, - std::mem::take(&mut self.string_buffer), - )); - } - } - } - if self.returned_eof { - Ok(None) - } else { - self.returned_eof = true; - Ok(Some(Token::new( - TokenType::EOF, - self.line, - self.column, - Some(std::mem::take(&mut self.string_buffer)), - ))) - } - } - - pub fn peek_next(&mut self) -> Result, Error> { - let current_pos = self.reader.stream_position()?; - let column = self.column; - let line = self.line; - let token = self.next_token()?; - self.reader.seek(SeekFrom::Start(current_pos))?; - self.column = column; - self.line = line; - Ok(token) - } - - // Updated helper functions to accept start_line and start_col - - fn tokenize_symbol( - &mut self, - first_symbol: char, - line: usize, - col: usize, - ) -> Result { - macro_rules! symbol { - ($symbol:ident) => { - Ok(Token::new( - TokenType::Symbol(Symbol::$symbol), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )) - }; + while matches!(current, Ok(Some(TokenType::Comment(_)))) { + current = self.lexer.next().transpose(); } - match first_symbol { - '(' => symbol!(LParen), - ')' => symbol!(RParen), - '{' => symbol!(LBrace), - '}' => symbol!(RBrace), - '[' => symbol!(LBracket), - ']' => symbol!(RBracket), - ';' => symbol!(Semicolon), - ':' => symbol!(Colon), - ',' => symbol!(Comma), - '+' => symbol!(Plus), - '-' => symbol!(Minus), - '/' => symbol!(Slash), - '.' => symbol!(Dot), - '^' => symbol!(Caret), - '%' => symbol!(Percent), - '<' if self.peek_next_char()? == Some('=') => { - self.next_char()?; - symbol!(LessThanOrEqual) - } - '<' => symbol!(LessThan), - '>' if self.peek_next_char()? == Some('=') => { - self.next_char()?; - symbol!(GreaterThanOrEqual) - } - '>' => symbol!(GreaterThan), - '=' if self.peek_next_char()? == Some('=') => { - self.next_char()?; - symbol!(Equal) - } - '=' => symbol!(Assign), - '!' if self.peek_next_char()? == Some('=') => { - self.next_char()?; - symbol!(NotEqual) - } - '!' => symbol!(LogicalNot), - '*' if self.peek_next_char()? == Some('*') => { - self.next_char()?; - symbol!(Exp) - } - '*' => symbol!(Asterisk), - '&' if self.peek_next_char()? == Some('&') => { - self.next_char()?; - symbol!(LogicalAnd) - } - '|' if self.peek_next_char()? == Some('|') => { - self.next_char()?; - symbol!(LogicalOr) - } - _ => Err(Error::UnknownSymbolError( - first_symbol, - line, - col, - std::mem::take(&mut self.string_buffer), - )), - } - } - - fn tokenize_number( - &mut self, - first_char: char, - line: usize, - col: usize, - ) -> Result { - let mut primary = String::with_capacity(16); - let mut decimal: Option = None; - let mut reading_decimal = false; - primary.push(first_char); - - while let Some(next_char) = self.peek_next_char()? { - if next_char.is_whitespace() { - break; - } - if next_char == '.' { - reading_decimal = true; - self.next_char()?; - continue; - } - if next_char == '_' { - self.next_char()?; - continue; - } - if !next_char.is_numeric() { - break; - } - - if reading_decimal { - decimal.get_or_insert_with(String::new).push(next_char); - } else { - primary.push(next_char); - } - self.next_char()?; - } - - let number: Number = if let Some(decimal) = decimal { - let decimal_scale = decimal.len() as u32; - let number_str = format!("{}{}", primary, decimal); - let number = number_str.parse::().map_err(|e| { - Error::NumberParseError(e, line, col, std::mem::take(&mut self.string_buffer)) - })?; - Number::Decimal( - Decimal::try_from_i128_with_scale(number, decimal_scale).map_err(|e| { - Error::DecimalParseError(e, line, col, std::mem::take(&mut self.string_buffer)) - })?, - ) - } else { - Number::Integer(primary.parse().map_err(|e| { - Error::NumberParseError(e, line, col, std::mem::take(&mut self.string_buffer)) - })?) - }; - - if let Some(next_char) = self.peek_next_char()? { - let temperature = match next_char { - 'c' => Temperature::Celsius(number), - 'f' => Temperature::Fahrenheit(number), - 'k' => Temperature::Kelvin(number), - _ => { - return Ok(Token::new( - TokenType::Number(number), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )); - } - } - .to_kelvin(); - - self.next_char()?; - Ok(Token::new( - TokenType::Number(temperature), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )) - } else { - Ok(Token::new( - TokenType::Number(number), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )) - } - } - - fn tokenize_string( - &mut self, - beginning_quote: char, - line: usize, - col: usize, - ) -> Result { - let mut buffer = String::with_capacity(16); - while let Some(next_char) = self.next_char()? { - if next_char == beginning_quote { - break; - } - buffer.push(next_char); - } - Ok(Token::new( - TokenType::String(buffer), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )) - } - - fn tokenize_keyword_or_identifier( - &mut self, - first_char: char, - line: usize, - col: usize, - ) -> Result { - macro_rules! keyword { - ($keyword:ident) => {{ - return Ok(Token::new( - TokenType::Keyword(Keyword::$keyword), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )); - }}; - } - macro_rules! next_ws { - () => { matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || (!x.is_alphanumeric()) && x != '_') || self.peek_next_char()?.is_none() }; - } - - let mut buffer = String::with_capacity(16); - let mut looped_char = Some(first_char); - - while let Some(next_char) = looped_char { - // allow UNDERSCORE_IDENTS - if next_char.is_whitespace() || (!next_char.is_alphanumeric() && next_char != '_') { - break; - } - buffer.push(next_char); - - match buffer.as_str() { - "let" if next_ws!() => keyword!(Let), - "fn" if next_ws!() => keyword!(Fn), - "if" if next_ws!() => keyword!(If), - "else" if next_ws!() => keyword!(Else), - "return" if next_ws!() => keyword!(Return), - "enum" if next_ws!() => keyword!(Enum), - "device" if next_ws!() => keyword!(Device), - "loop" if next_ws!() => keyword!(Loop), - "break" if next_ws!() => keyword!(Break), - "while" if next_ws!() => keyword!(While), - "continue" if next_ws!() => keyword!(Continue), - "const" if next_ws!() => keyword!(Const), - "true" if next_ws!() => { - return Ok(Token::new( - TokenType::Boolean(true), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )); - } - "false" if next_ws!() => { - return Ok(Token::new( - TokenType::Boolean(false), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )); - } - val if next_ws!() => { - return Ok(Token::new( - TokenType::Identifier(val.to_string()), - line, - col, - Some(std::mem::take(&mut self.string_buffer)), - )); - } - _ => {} - } - looped_char = self.next_char()?; - } - Err(Error::UnknownKeywordOrIdentifierError( - buffer, - line, - col, - std::mem::take(&mut self.string_buffer), - )) + Ok(current.map(|t| t.map(|t| self.get_token(t)))?) } } // ... Iterator and TokenizerBuffer implementations remain unchanged ... // They just call the methods above which now use the passed-in start coordinates. impl<'a> Iterator for Tokenizer<'a> { - type Item = Result; + type Item = Result, Error>; fn next(&mut self) -> Option { - match self.next_token() { - Ok(Some(tok)) => Some(Ok(tok)), - Ok(None) => None, - Err(e) => Some(Err(e)), + match self.lexer.next() { + None => { + if self.returned_eof { + None + } else { + self.returned_eof = true; + Some(Ok(Token::new( + TokenType::EOF, + self.lexer.extras.line_count, + self.lexer.span(), + ))) + } + } + Some(t) => match t { + Err(e) => Some(Err(e.into())), + Ok(t) => Some(Ok(self.get_token(t))), + }, } } } pub struct TokenizerBuffer<'a> { tokenizer: Tokenizer<'a>, - buffer: VecDeque, - history: VecDeque, + buffer: VecDeque>, + history: VecDeque>, index: i64, } @@ -531,7 +112,7 @@ impl<'a> TokenizerBuffer<'a> { index: 0, } } - pub fn next_token(&mut self) -> Result, Error> { + pub fn next_token(&mut self) -> Result>, Error> { if let Some(token) = self.buffer.pop_front() { self.history.push_back(token.clone()); self.index += 1; @@ -546,12 +127,16 @@ impl<'a> TokenizerBuffer<'a> { self.index += 1; Ok(token) } - pub fn peek(&mut self) -> Result, Error> { + pub fn peek(&mut self) -> Result>, Error> { if let Some(token) = self.buffer.front() { return Ok(Some(token.clone())); } - let token = self.tokenizer.peek_next()?; - Ok(token) + + let Some(new_token) = self.tokenizer.next_token()? else { + return Ok(None); + }; + self.buffer.push_front(new_token.clone()); + Ok(Some(new_token)) } pub fn loc(&self) -> i64 { self.index @@ -601,437 +186,3 @@ impl<'a> TokenizerBuffer<'a> { Ok(()) } } - -#[cfg(test)] -mod tests { - use super::*; - use anyhow::Result; - use rust_decimal::Decimal; - - const TEST_FILE: &str = "tests/file.stlg"; - - const TEST_STRING: &str = r#" - fn test() { - let x = 10; - return x + 2; - } - "#; - - #[test] - fn test_seek_from_current() -> Result<()> { - let tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - let mut buffer = TokenizerBuffer::new(tokenizer); - - let token = buffer.next_token()?.unwrap(); - assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); - - buffer.seek(SeekFrom::Current(1))?; - - let token = buffer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen)); - - Ok(()) - } - - #[test] - fn test_tokenizer_from_path_ok() { - let tokenizer = Tokenizer::from_path(TEST_FILE); - assert!(tokenizer.is_ok()); - } - - #[test] - fn test_tokenizer_from_path_err() { - let tokenizer = Tokenizer::from_path("non_existent_file.stlg"); - assert!(tokenizer.is_err()); - } - - #[test] - fn test_next_char() -> Result<()> { - let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - - let char = tokenizer.next_char()?; - - assert_eq!(char, Some('\n')); - assert_eq!(tokenizer.line, 2); - assert_eq!(tokenizer.column, 1); - - let mut tokenizer = Tokenizer::from(String::from("fn")); - - let char = tokenizer.next_char()?; - - assert_eq!(char, Some('f')); - assert_eq!(tokenizer.line, 1); - assert_eq!(tokenizer.column, 1); - - Ok(()) - } - - #[test] - fn test_peek_next_char() -> Result<()> { - let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - - let char = tokenizer.peek_next_char()?; - - assert_eq!(char, Some('\n')); - assert_eq!(tokenizer.line, 1); - assert_eq!(tokenizer.column, 0); - - let char = tokenizer.next_char()?; - assert_eq!(char, Some('\n')); - assert_eq!(tokenizer.line, 2); - assert_eq!(tokenizer.column, 1); - - let char = tokenizer.peek_next_char()?; - assert_eq!(char, Some(' ')); - assert_eq!(tokenizer.line, 2); - assert_eq!(tokenizer.column, 1); - - Ok(()) - } - - #[test] - fn test_temperature_unit() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("10c 14f 10k")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::Number(Number::Decimal(Decimal::new(28315, 2))) - ); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::Number(Number::Decimal(Decimal::new(26315, 2))) - ); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); - - Ok(()) - } - - #[test] - fn test_parse_integer() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("10")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); - - Ok(()) - } - - #[test] - fn test_parse_integer_with_underscore() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("1_000")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Number(Number::Integer(1000))); - - Ok(()) - } - - #[test] - fn test_parse_decimal() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("10.5")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::Number(Number::Decimal(Decimal::new(105, 1))) // 10.5 - ); - - Ok(()) - } - - #[test] - fn test_parse_decimal_with_underscore() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("1_000.000_6")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::Number(Number::Decimal(Decimal::new(10000006, 4))) // 1000.0006 - ); - - Ok(()) - } - - #[test] - fn test_parse_number_with_symbol() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("10;")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); - - let next_char = tokenizer.next_char()?; - - assert_eq!(next_char, Some(';')); - - Ok(()) - } - - #[test] - fn test_string_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from(r#""Hello, World!""#)); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::String(String::from("Hello, World!")) - ); - - let mut tokenizer = Tokenizer::from(String::from(r#"'Hello, World!'"#)); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::String(String::from("Hello, World!")) - ); - - Ok(()) - } - - #[test] - fn test_symbol_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from( - "^ ! () [] {} , . ; : + - * / < > = != && || >= <=**%", - )); - - let expected_tokens = vec![ - TokenType::Symbol(Symbol::Caret), - TokenType::Symbol(Symbol::LogicalNot), - TokenType::Symbol(Symbol::LParen), - TokenType::Symbol(Symbol::RParen), - TokenType::Symbol(Symbol::LBracket), - TokenType::Symbol(Symbol::RBracket), - TokenType::Symbol(Symbol::LBrace), - TokenType::Symbol(Symbol::RBrace), - TokenType::Symbol(Symbol::Comma), - TokenType::Symbol(Symbol::Dot), - TokenType::Symbol(Symbol::Semicolon), - TokenType::Symbol(Symbol::Colon), - TokenType::Symbol(Symbol::Plus), - TokenType::Symbol(Symbol::Minus), - TokenType::Symbol(Symbol::Asterisk), - TokenType::Symbol(Symbol::Slash), - TokenType::Symbol(Symbol::LessThan), - TokenType::Symbol(Symbol::GreaterThan), - TokenType::Symbol(Symbol::Assign), - TokenType::Symbol(Symbol::NotEqual), - TokenType::Symbol(Symbol::LogicalAnd), - TokenType::Symbol(Symbol::LogicalOr), - TokenType::Symbol(Symbol::GreaterThanOrEqual), - TokenType::Symbol(Symbol::LessThanOrEqual), - TokenType::Symbol(Symbol::Exp), - TokenType::Symbol(Symbol::Percent), - ]; - - for expected_token in expected_tokens { - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, expected_token); - } - - Ok(()) - } - - #[test] - fn test_keyword_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from( - "let fn if else return enum continue break const", - )); - - let expected_tokens = vec![ - TokenType::Keyword(Keyword::Let), - TokenType::Keyword(Keyword::Fn), - TokenType::Keyword(Keyword::If), - TokenType::Keyword(Keyword::Else), - TokenType::Keyword(Keyword::Return), - TokenType::Keyword(Keyword::Enum), - TokenType::Keyword(Keyword::Continue), - TokenType::Keyword(Keyword::Break), - TokenType::Keyword(Keyword::Const), - ]; - - for expected_token in expected_tokens { - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, expected_token); - } - - Ok(()) - } - - #[test] - fn test_identifier_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("fn test fn test_underscores")); - - let token = tokenizer.next_token()?.unwrap(); - assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); - let token = tokenizer.next_token()?.unwrap(); - assert_eq!( - token.token_type, - TokenType::Identifier(String::from("test")) - ); - let token = tokenizer.next_token()?.unwrap(); - assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); - let token = tokenizer.next_token()?.unwrap(); - assert_eq!( - token.token_type, - TokenType::Identifier(String::from("test_underscores")) - ); - - Ok(()) - } - - #[test] - fn test_boolean_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("true false")); - - let token = tokenizer.next_token()?.unwrap(); - assert_eq!(token.token_type, TokenType::Boolean(true)); - let token = tokenizer.next_token()?.unwrap(); - assert_eq!(token.token_type, TokenType::Boolean(false)); - - Ok(()) - } - - #[test] - fn test_full_source() -> Result<()> { - let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - - let expected_tokens = vec![ - TokenType::Keyword(Keyword::Fn), - TokenType::Identifier(String::from("test")), - TokenType::Symbol(Symbol::LParen), - TokenType::Symbol(Symbol::RParen), - TokenType::Symbol(Symbol::LBrace), - TokenType::Keyword(Keyword::Let), - TokenType::Identifier(String::from("x")), - TokenType::Symbol(Symbol::Assign), - TokenType::Number(Number::Integer(10)), - TokenType::Symbol(Symbol::Semicolon), - TokenType::Keyword(Keyword::Return), - TokenType::Identifier(String::from("x")), - TokenType::Symbol(Symbol::Plus), - TokenType::Number(Number::Integer(2)), - TokenType::Symbol(Symbol::Semicolon), - TokenType::Symbol(Symbol::RBrace), - ]; - - for expected_token in expected_tokens { - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, expected_token); - } - - Ok(()) - } - - #[test] - fn test_peek_next() -> Result<()> { - let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - - let column = tokenizer.column; - let line = tokenizer.line; - - let peeked_token = tokenizer.peek_next()?; - - assert_eq!( - peeked_token.unwrap().token_type, - TokenType::Keyword(Keyword::Fn) - ); - assert_eq!(tokenizer.column, column); - assert_eq!(tokenizer.line, line); - - let next_token = tokenizer.next_token()?; - - assert_eq!( - next_token.unwrap().token_type, - TokenType::Keyword(Keyword::Fn) - ); - assert_ne!(tokenizer.column, column); - assert_ne!(tokenizer.line, line); - - Ok(()) - } - - #[test] - fn test_compact_syntax() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("if(true) while(false)")); - - // if(true) - assert_eq!( - tokenizer.next_token()?.unwrap().token_type, - TokenType::Keyword(Keyword::If) - ); - assert_eq!( - tokenizer.next_token()?.unwrap().token_type, - TokenType::Symbol(Symbol::LParen) - ); - assert_eq!( - tokenizer.next_token()?.unwrap().token_type, - TokenType::Boolean(true) - ); - assert_eq!( - tokenizer.next_token()?.unwrap().token_type, - TokenType::Symbol(Symbol::RParen) - ); - - // while(false) - assert_eq!( - tokenizer.next_token()?.unwrap().token_type, - TokenType::Keyword(Keyword::While) - ); - assert_eq!( - tokenizer.next_token()?.unwrap().token_type, - TokenType::Symbol(Symbol::LParen) - ); - - Ok(()) - } - - #[test] - fn test_identifier_has_correct_length() -> Result<()> { - let mut tokenizer = Tokenizer::from("hello"); - assert_eq!( - tokenizer.next_token()?, - Some(Token { - token_type: TokenType::Identifier("hello".into()), - original_string: Some("hello".into()), - column: 1, - line: 1 - }) - ); - Ok(()) - } - - #[test] - fn test_keyword_token_has_correct_length() -> Result<()> { - let mut tokenizer = Tokenizer::from("while"); - - assert_eq!( - tokenizer.next_token()?, - Some(Token { - token_type: TokenType::Keyword(Keyword::While), - original_string: Some("while".into()), - column: 1, - line: 1 - }) - ); - - Ok(()) - } -} diff --git a/rust_compiler/libs/tokenizer/src/token.rs b/rust_compiler/libs/tokenizer/src/token.rs index 9745ecd..bfda737 100644 --- a/rust_compiler/libs/tokenizer/src/token.rs +++ b/rust_compiler/libs/tokenizer/src/token.rs @@ -1,5 +1,59 @@ +use std::borrow::Cow; + use helpers::prelude::*; +use logos::{Lexer, Logos, Skip, Span}; +use lsp_types::{Diagnostic, DiagnosticSeverity, Position, Range}; use rust_decimal::Decimal; +use thiserror::Error; + +#[derive(Debug, Error, Default, Clone, PartialEq)] +pub enum LexError { + #[error("Attempted to parse an invalid number: {2}")] + NumberParse(usize, Span, String), + + #[error("An invalid character was found in token stream: {2}")] + InvalidInput(usize, Span, String), + + #[default] + #[error("An unknown error occurred")] + Other, +} + +impl From for Diagnostic { + fn from(value: LexError) -> Self { + match value { + LexError::NumberParse(line, col, str) | LexError::InvalidInput(line, col, str) => { + Diagnostic { + range: Range { + start: Position { + character: col.start as u32, + line: line as u32, + }, + end: Position { + line: line as u32, + character: col.end as u32, + }, + }, + severity: Some(DiagnosticSeverity::ERROR), + message: str, + ..Default::default() + } + } + _ => todo!(), + } + } +} + +impl LexError { + pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Self { + let mut span = lex.span(); + let line = lex.extras.line_count; + span.start -= lex.extras.line_start_index; + span.end -= lex.extras.line_start_index; + + Self::InvalidInput(line, span, lex.slice().chars().as_str().to_string()) + } +} // Define a local macro to consume the list macro_rules! generate_check { @@ -10,29 +64,40 @@ macro_rules! generate_check { } } -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct Token { - /// The type of the token - pub token_type: TokenType, - /// The line where the token was found - pub line: usize, - /// The column where the token was found - pub column: usize, - pub original_string: Option, +#[derive(Default)] +pub struct Extras { + pub line_count: usize, + pub line_start_index: usize, } -impl Token { - pub fn new( - token_type: TokenType, - line: usize, - column: usize, - original: Option, - ) -> Self { +fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Skip { + lex.extras.line_count += 1; + lex.extras.line_start_index = lex.span().end; + Skip +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Token<'a> { + /// The type of the token + pub token_type: TokenType<'a>, + /// The line where the token was found + pub line: usize, + /// The span where the token starts and ends + pub span: Span, +} + +impl<'a> std::fmt::Display for Token<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.token_type) + } +} + +impl<'a> Token<'a> { + pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self { Self { token_type, line, - column, - original_string: original, + span, } } } @@ -79,25 +144,186 @@ impl Temperature { } } -#[derive(Debug, PartialEq, Hash, Eq, Clone)] -pub enum TokenType { +macro_rules! symbol { + ($var:ident) => { + |_| Symbol::$var + }; +} + +macro_rules! keyword { + ($var:ident) => { + |_| Keyword::$var + }; +} + +#[derive(Debug, PartialEq, Hash, Eq, Clone, Logos)] +#[logos(skip r"[ \t\f]+")] +#[logos(extras = Extras)] +#[logos(error(LexError, LexError::from_lexer))] +pub enum TokenType<'a> { + #[regex(r"\n", update_line_index)] + Newline, + + // matches strings with double quotes + #[regex(r#""(?:[^"\\]|\\.)*""#, |v| { + let str = v.slice(); + Cow::from(&str[1..str.len() - 1]) + })] + // matches strings with single quotes + #[regex(r#"'(?:[^'\\]|\\.)*'"#, |v| { + let str = v.slice(); + Cow::from(&str[1..str.len() - 1]) + })] /// Represents a string token - String(String), + String(Cow<'a, str>), + + #[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)] /// Represents a number token Number(Number), + + #[token("true", |_| true)] + #[token("false", |_| false)] /// Represents a boolean token Boolean(bool), + + #[token("continue", keyword!(Continue))] + #[token("const", keyword!(Const))] + #[token("let", keyword!(Let))] + #[token("fn", keyword!(Fn))] + #[token("if", keyword!(If))] + #[token("device", keyword!(Device))] + #[token("else", keyword!(Else))] + #[token("return", keyword!(Return))] + #[token("enum", keyword!(Enum))] + #[token("loop", keyword!(Loop))] + #[token("break", keyword!(Break))] + #[token("while", keyword!(While))] /// Represents a keyword token Keyword(Keyword), + + #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| Cow::from(v.slice()))] /// Represents an identifier token - Identifier(String), + Identifier(Cow<'a, str>), + + #[token("(", symbol!(LParen))] + #[token(")", symbol!(RParen))] + #[token("{", symbol!(LBrace))] + #[token("}", symbol!(RBrace))] + #[token("[", symbol!(LBracket))] + #[token("]", symbol!(RBracket))] + #[token(";", symbol!(Semicolon))] + #[token(":", symbol!(Colon))] + #[token(",", symbol!(Comma))] + #[token("+", symbol!(Plus))] + #[token("-", symbol!(Minus))] + #[token("*", symbol!(Asterisk))] + #[token("/", symbol!(Slash))] + #[token("<", symbol!(LessThan))] + #[token(">", symbol!(GreaterThan))] + #[token("=", symbol!(Assign))] + #[token("!", symbol!(LogicalNot))] + #[token(".", symbol!(Dot))] + #[token("^", symbol!(Caret))] + #[token("%", symbol!(Percent))] + #[token("==", symbol!(Equal))] + #[token("!=", symbol!(NotEqual))] + #[token("&&", symbol!(LogicalAnd))] + #[token("||", symbol!(LogicalOr))] + #[token("<=", symbol!(LessThanOrEqual))] + #[token(">=", symbol!(GreaterThanOrEqual))] + #[token("**", symbol!(Exp))] /// Represents a symbol token Symbol(Symbol), + + #[token("//", |lex| Comment::Line(read_line(lex)))] + #[token("///", |lex| Comment::Doc(read_line(lex)))] + /// Represents a comment, both a line comment and a doc comment + Comment(Comment<'a>), + + #[end] /// Represents an end of file token EOF, } -impl Documentation for TokenType { +fn read_line<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Cow<'a, str> { + let rem = lexer.remainder(); + let len = rem.find('\n').unwrap_or(rem.len()); + let content = rem[..len].trim().to_string(); + + lexer.bump(len); + Cow::from(content) +} + +#[derive(Hash, Debug, Eq, PartialEq, Clone)] +pub enum Comment<'a> { + Line(Cow<'a, str>), + Doc(Cow<'a, str>), +} + +fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result { + let slice = lexer.slice(); + let last_char = slice.chars().last().unwrap_or_default(); + let (num_str, suffix) = match last_char { + 'c' | 'k' | 'f' => (&slice[..slice.len() - 1], Some(last_char)), + _ => (slice, None), + }; + + let clean_str = if num_str.contains('_') { + num_str.replace('_', "") + } else { + num_str.to_string() + }; + + let line = lexer.extras.line_count; + let mut span = lexer.span(); + span.end -= lexer.extras.line_start_index; + span.start -= lexer.extras.line_start_index; + + let num = if clean_str.contains('.') { + Number::Decimal( + clean_str + .parse::() + .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, + ) + } else { + Number::Integer( + clean_str + .parse::() + .map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?, + ) + }; + + if let Some(suffix) = suffix { + Ok(match suffix { + 'c' => Temperature::Celsius(num), + 'f' => Temperature::Fahrenheit(num), + 'k' => Temperature::Kelvin(num), + _ => unreachable!(), + } + .to_kelvin()) + } else { + Ok(num) + } +} + +impl<'a> std::fmt::Display for Comment<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Line(c) => write!(f, "// {}", c), + Self::Doc(d) => { + let lines = d + .split('\n') + .map(|s| format!("/// {s}")) + .collect::>() + .join("\n"); + + write!(f, "{}", lines) + } + } + } +} + +impl<'a> Documentation for TokenType<'a> { fn docs(&self) -> String { match self { Self::Keyword(k) => k.docs(), @@ -112,7 +338,7 @@ impl Documentation for TokenType { helpers::with_syscalls!(generate_check); -impl From for u32 { +impl<'a> From> for u32 { fn from(value: TokenType) -> Self { match value { TokenType::String(_) => 1, @@ -128,6 +354,7 @@ impl From for u32 { | Keyword::Return => 4, _ => 5, }, + TokenType::Comment(_) => 8, TokenType::Identifier(s) => { if is_syscall(&s) { 10 @@ -146,12 +373,12 @@ impl From for u32 { 7 } } - TokenType::EOF => 0, + _ => 0, } } } -impl std::fmt::Display for TokenType { +impl<'a> std::fmt::Display for TokenType<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { TokenType::String(s) => write!(f, "{}", s), @@ -160,7 +387,9 @@ impl std::fmt::Display for TokenType { TokenType::Keyword(k) => write!(f, "{:?}", k), TokenType::Identifier(i) => write!(f, "{}", i), TokenType::Symbol(s) => write!(f, "{}", s), + TokenType::Comment(c) => write!(f, "{}", c), TokenType::EOF => write!(f, "EOF"), + _ => write!(f, ""), } } } diff --git a/rust_compiler/src/ffi/mod.rs b/rust_compiler/src/ffi/mod.rs index ee31887..9dc8763 100644 --- a/rust_compiler/src/ffi/mod.rs +++ b/rust_compiler/src/ffi/mod.rs @@ -96,9 +96,10 @@ pub fn free_docs_vec(v: safer_ffi::Vec) { #[ffi_export] pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::String { let res = std::panic::catch_unwind(|| { + let input = String::from_utf16_lossy(input.as_slice()); let mut writer = BufWriter::new(Vec::new()); - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let tokenizer = Tokenizer::from(input.as_str()); let parser = Parser::new(tokenizer); let compiler = Compiler::new(parser, &mut writer, None); @@ -120,7 +121,8 @@ pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi:: #[ffi_export] pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { let res = std::panic::catch_unwind(|| { - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let input = String::from_utf16_lossy(input.as_slice()); + let tokenizer = Tokenizer::from(input.as_str()); let mut tokens = Vec::new(); @@ -136,34 +138,31 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { + use tokenizer::token::LexError; use tokenizer::Error::*; - let (err_str, col, og) = match e { - NumberParseError(_, _, col, og) - | DecimalParseError(_, _, col, og) - | UnknownSymbolError(_, _, col, og) - | UnknownKeywordOrIdentifierError(_, _, col, og) => { - (e.to_string(), col, og) + let (err_str, _, span) = match e { + LexError(LexError::NumberParse(line, span, err)) + | LexError(LexError::InvalidInput(line, span, err)) => { + (err.to_string(), line, span) } + _ => continue, }; tokens.push(FfiToken { - column: *col as i32, + column: span.start as i32, error: err_str.into(), tooltip: "".into(), - length: og.len() as i32, + length: (span.end - span.start) as i32, token_kind: 0, }) } Ok(Token { - column, - original_string, - token_type, - .. + span, token_type, .. }) => tokens.push(FfiToken { - column: column as i32, + column: span.start as i32, error: "".into(), - length: (original_string.unwrap_or_default().len()) as i32, + length: (span.end - span.start) as i32, tooltip: token_type.docs().into(), token_kind: token_type.into(), }), @@ -179,8 +178,10 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec) -> safer_ffi::Vec { let res = std::panic::catch_unwind(|| { + let input = String::from_utf16_lossy(input.as_slice()); + let mut writer = BufWriter::new(Vec::new()); - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let tokenizer = Tokenizer::from(input.as_str()); let compiler = Compiler::new(Parser::new(tokenizer), &mut writer, None); let diagnosis = compiler.compile(); diff --git a/rust_compiler/src/main.rs b/rust_compiler/src/main.rs index 730b2a2..040f07f 100644 --- a/rust_compiler/src/main.rs +++ b/rust_compiler/src/main.rs @@ -1,8 +1,5 @@ #![allow(clippy::result_large_err)] -#[macro_use] -extern crate quick_error; - use clap::Parser; use compiler::Compiler; use parser::Parser as ASTParser; @@ -11,27 +8,39 @@ use std::{ io::{stderr, BufWriter, Read, Write}, path::PathBuf, }; +use thiserror::Error; use tokenizer::{self, Tokenizer}; -quick_error! { - #[derive(Debug)] - enum StationlangError { - TokenizerError(err: tokenizer::Error) { - from() - display("Tokenizer error: {}", err) - } - ParserError(err: parser::Error) { - from() - display("Parser error: {}", err) - } - CompileError(err: compiler::Error) { - from() - display("Compile error: {}", err) - } - IoError(err: std::io::Error) { - from() - display("IO error: {}", err) - } +#[derive(Error, Debug)] +enum Error<'a> { + #[error(transparent)] + Tokenizer(tokenizer::Error), + + #[error(transparent)] + Parser(parser::Error<'a>), + + #[error(transparent)] + Compile(compiler::Error<'a>), + + #[error(transparent)] + IO(#[from] std::io::Error), +} + +impl<'a> From> for Error<'a> { + fn from(value: parser::Error<'a>) -> Self { + Self::Parser(value) + } +} + +impl<'a> From> for Error<'a> { + fn from(value: compiler::Error<'a>) -> Self { + Self::Compile(value) + } +} + +impl<'a> From for Error<'a> { + fn from(value: tokenizer::Error) -> Self { + Self::Tokenizer(value) } } @@ -46,12 +55,17 @@ struct Args { output_file: Option, } -fn run_logic() -> Result<(), StationlangError> { +fn run_logic<'a>() -> Result<(), Error<'a>> { let args = Args::parse(); let input_file = args.input_file; - let tokenizer: Tokenizer = match input_file { - Some(input_file) => Tokenizer::from_path(&input_file)?, + let input_string = match input_file { + Some(input_path) => { + let mut buf = String::new(); + let mut file = std::fs::File::open(input_path).unwrap(); + file.read_to_string(&mut buf).unwrap(); + buf + } None => { let mut buf = String::new(); let stdin = std::io::stdin(); @@ -62,10 +76,11 @@ fn run_logic() -> Result<(), StationlangError> { return Ok(()); } - Tokenizer::from(buf) + buf } }; + let tokenizer = Tokenizer::from(input_string.as_str()); let parser = ASTParser::new(tokenizer); let mut writer: BufWriter> = match args.output_file { @@ -75,20 +90,17 @@ fn run_logic() -> Result<(), StationlangError> { let compiler = Compiler::new(parser, &mut writer, None); - let mut errors = compiler.compile(); + let errors = compiler.compile(); if !errors.is_empty() { let mut std_error = stderr(); - let last = errors.pop(); - let errors = errors.into_iter().map(StationlangError::from); + let errors = errors.into_iter().map(Error::from); std_error.write_all(b"Compilation error:\n")?; for err in errors { std_error.write_all(format!("{}\n", err).as_bytes())?; } - - return Err(StationlangError::from(last.unwrap())); } writer.flush()?; @@ -96,7 +108,7 @@ fn run_logic() -> Result<(), StationlangError> { Ok(()) } -fn main() -> Result<(), StationlangError> { +fn main() -> anyhow::Result<()> { run_logic()?; Ok(())