From 06a151ab7ee6db9ba656ec71ab086ea2323022f0 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 30 Nov 2025 20:31:06 -0700 Subject: [PATCH] wip -- lsp mappings to various types --- csharp_mod/FfiGlue.cs | 59 +++- csharp_mod/Formatter.cs | 24 +- csharp_mod/Marshal.cs | 23 -- rust_compiler/Cargo.lock | 44 +++ rust_compiler/Cargo.toml | 2 + rust_compiler/libs/compiler/Cargo.toml | 1 + .../test/declaration_function_invocation.rs | 4 +- rust_compiler/libs/compiler/src/test/mod.rs | 4 +- rust_compiler/libs/compiler/src/v1.rs | 320 +++++++++++++----- rust_compiler/libs/parser/Cargo.toml | 1 + rust_compiler/libs/parser/src/lib.rs | 120 +++++-- rust_compiler/libs/parser/src/tree_node.rs | 31 +- rust_compiler/libs/tokenizer/Cargo.toml | 1 + rust_compiler/libs/tokenizer/src/lib.rs | 33 ++ rust_compiler/src/ffi/mod.rs | 103 ++++++ rust_compiler/src/lib.rs | 106 +----- rust_compiler/src/lsp/mod.rs | 0 rust_compiler/src/main.rs | 19 +- 18 files changed, 640 insertions(+), 255 deletions(-) create mode 100644 rust_compiler/src/ffi/mod.rs create mode 100644 rust_compiler/src/lsp/mod.rs diff --git a/csharp_mod/FfiGlue.cs b/csharp_mod/FfiGlue.cs index 551d531..668bb96 100644 --- a/csharp_mod/FfiGlue.cs +++ b/csharp_mod/FfiGlue.cs @@ -88,6 +88,53 @@ namespace Slang public static extern unsafe Vec_uint8_t compile_from_string(slice_ref_uint16_t input); } + [StructLayout(LayoutKind.Sequential, Size = 16)] + public unsafe struct FfiRange_t + { + public UInt32 start_col; + + public UInt32 end_col; + + public UInt32 start_line; + + public UInt32 end_line; + } + + [StructLayout(LayoutKind.Sequential, Size = 48)] + public unsafe struct FfiDiagnostic_t + { + public Vec_uint8_t message; + + public Int32 severity; + + public FfiRange_t range; + } + + /// + /// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout + /// + [StructLayout(LayoutKind.Sequential, Size = 24)] + public unsafe struct Vec_FfiDiagnostic_t + { + public FfiDiagnostic_t* ptr; + + public UIntPtr len; + + public UIntPtr cap; + } + + public unsafe partial class Ffi + { + [DllImport(RustLib, ExactSpelling = true)] + public static extern unsafe Vec_FfiDiagnostic_t diagnose_source(); + } + + public unsafe partial class Ffi + { + [DllImport(RustLib, ExactSpelling = true)] + public static extern unsafe void free_ffi_diagnostic_vec(Vec_FfiDiagnostic_t v); + } + [StructLayout(LayoutKind.Sequential, Size = 64)] public unsafe struct FfiToken_t { @@ -126,16 +173,4 @@ namespace Slang [DllImport(RustLib, ExactSpelling = true)] public static extern unsafe void free_string(Vec_uint8_t s); } - - public unsafe partial class Ffi - { - /// - /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because - /// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. - /// This should result in the ability to tokenize many times without triggering frame drops - /// from the GC from a GetBytes() call on a string in C#. - /// - [DllImport(RustLib, ExactSpelling = true)] - public static extern unsafe Vec_FfiToken_t tokenize_line(slice_ref_uint16_t input); - } } /* Slang */ diff --git a/csharp_mod/Formatter.cs b/csharp_mod/Formatter.cs index 973351a..b26d2cf 100644 --- a/csharp_mod/Formatter.cs +++ b/csharp_mod/Formatter.cs @@ -1,15 +1,20 @@ namespace Slang; +using System.Timers; using StationeersIC10Editor; public class SlangFormatter : ICodeFormatter { + private Timer _timer; + public static readonly uint ColorInstruction = ColorFromHTML("#ffff00"); public static readonly uint ColorString = ColorFromHTML("#ce9178"); - public override Line ParseLine(string line) + public SlangFormatter() { - return Marshal.TokenizeLine(line); + _timer = new Timer(250); + + this.OnCodeChanged += HandleCodeChanged; } public override string Compile() @@ -17,4 +22,19 @@ public class SlangFormatter : ICodeFormatter L.Info("ICodeFormatter attempted to compile source code."); return this.Lines.RawText; } + + public override Line ParseLine(string line) + { + return new Line(line); + } + + private void HandleCodeChanged() + { + _timer.Stop(); + _timer.Dispose(); + _timer = new Timer(250); + _timer.Elapsed += (_, _) => HandleLsp(); + } + + private void HandleLsp() { } } diff --git a/csharp_mod/Marshal.cs b/csharp_mod/Marshal.cs index e0b54ab..ffb6a58 100644 --- a/csharp_mod/Marshal.cs +++ b/csharp_mod/Marshal.cs @@ -61,29 +61,6 @@ public static class Marshal } } - public static unsafe Line TokenizeLine(string source) - { - if (String.IsNullOrEmpty(source)) - { - return new Line(source); - } - - if (!EnsureLibLoaded()) - { - return new Line(source); - } - - fixed (char* ptrString = source) - { - var input = new slice_ref_uint16_t - { - ptr = (ushort*)ptrString, - len = (UIntPtr)source.Length, - }; - return Ffi.tokenize_line(input).ToLine(source); - } - } - public static unsafe bool CompileFromString(string inputString, out string compiledString) { if (String.IsNullOrEmpty(inputString)) diff --git a/rust_compiler/Cargo.lock b/rust_compiler/Cargo.lock index 3d8cef8..b42d3c4 100644 --- a/rust_compiler/Cargo.lock +++ b/rust_compiler/Cargo.lock @@ -114,6 +114,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitvec" version = "1.0.1" @@ -247,6 +253,7 @@ version = "0.1.0" dependencies = [ "anyhow", "indoc", + "lsp-types", "parser", "pretty_assertions", "quick-error", @@ -300,6 +307,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320bea982e85d42441eb25c49b41218e7eaa2657e8f90bc4eca7437376751e23" +[[package]] +name = "fluent-uri" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c704e9dbe1ddd863da1e6ff3567795087b1eb201ce80d8fa81162e1516500d" +dependencies = [ + "bitflags", +] + [[package]] name = "funty" version = "2.0.0" @@ -400,6 +416,19 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "lsp-types" +version = "0.97.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53353550a17c04ac46c585feb189c2db82154fc84b79c7a66c96c2c644f66071" +dependencies = [ + "bitflags", + "fluent-uri", + "serde", + "serde_json", + "serde_repr", +] + [[package]] name = "macro_rules_attribute" version = "0.1.3" @@ -466,6 +495,7 @@ name = "parser" version = "0.1.0" dependencies = [ "anyhow", + "lsp-types", "quick-error", "tokenizer", ] @@ -732,6 +762,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", + "serde_derive", ] [[package]] @@ -767,6 +798,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "sha2-const-stable" version = "0.1.0" @@ -786,6 +828,7 @@ dependencies = [ "anyhow", "clap", "compiler", + "lsp-types", "parser", "quick-error", "rust_decimal", @@ -882,6 +925,7 @@ name = "tokenizer" version = "0.1.0" dependencies = [ "anyhow", + "lsp-types", "quick-error", "rust_decimal", ] diff --git a/rust_compiler/Cargo.toml b/rust_compiler/Cargo.toml index ae125f6..8286678 100644 --- a/rust_compiler/Cargo.toml +++ b/rust_compiler/Cargo.toml @@ -10,6 +10,7 @@ members = ["libs/*"] quick-error = "2" rust_decimal = "1" safer-ffi = { version = "0.1" } +lsp-types = { version = "0.97" } [features] headers = ["safer-ffi/headers"] @@ -33,6 +34,7 @@ crate-type = ["cdylib", "rlib"] [dependencies] clap = { version = "^4.5", features = ["derive"] } +lsp-types = { workspace = true } quick-error = { workspace = true } rust_decimal = { workspace = true } tokenizer = { path = "libs/tokenizer" } diff --git a/rust_compiler/libs/compiler/Cargo.toml b/rust_compiler/libs/compiler/Cargo.toml index 85434ac..a820e20 100644 --- a/rust_compiler/libs/compiler/Cargo.toml +++ b/rust_compiler/libs/compiler/Cargo.toml @@ -7,6 +7,7 @@ edition = "2024" quick-error = { workspace = true } parser = { path = "../parser" } tokenizer = { path = "../tokenizer" } +lsp-types = { workspace = true } [dev-dependencies] anyhow = { version = "1.0" } diff --git a/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs b/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs index 23c8f55..2e0c3c2 100644 --- a/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs +++ b/rust_compiler/libs/compiler/src/test/declaration_function_invocation.rs @@ -82,8 +82,8 @@ fn incorrect_args_count() -> anyhow::Result<()> { }; assert!(matches!( - compiled, - Err(super::super::Error::AgrumentMismatch(_, _)) + compiled[0], + super::super::Error::AgrumentMismatch(_, _) )); Ok(()) diff --git a/rust_compiler/libs/compiler/src/test/mod.rs b/rust_compiler/libs/compiler/src/test/mod.rs index 6a4ba07..0c8aac1 100644 --- a/rust_compiler/libs/compiler/src/test/mod.rs +++ b/rust_compiler/libs/compiler/src/test/mod.rs @@ -15,7 +15,7 @@ macro_rules! compile { &mut writer, None, ); - compiler.compile()?; + compiler.compile(); output!(writer) }}; @@ -36,7 +36,7 @@ macro_rules! compile { &mut writer, Some(crate::CompilerConfig { debug: true }), ); - compiler.compile()?; + compiler.compile(); output!(writer) }}; } diff --git a/rust_compiler/libs/compiler/src/v1.rs b/rust_compiler/libs/compiler/src/v1.rs index aaefba6..82cb19b 100644 --- a/rust_compiler/libs/compiler/src/v1.rs +++ b/rust_compiler/libs/compiler/src/v1.rs @@ -39,8 +39,8 @@ quick_error! { ParseError(error: parser::Error) { from() } - IoError(error: std::io::Error) { - from() + IoError(error: String) { + display("IO Error: {}", error) } ScopeError(error: variable_manager::Error) { from() @@ -63,6 +63,49 @@ quick_error! { } } +impl From for lsp_types::Diagnostic { + fn from(value: Error) -> Self { + use Error::*; + use lsp_types::*; + match value { + ParseError(e) => e.into(), + IoError(e) => Diagnostic { + message: e.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + ScopeError(e) => Diagnostic { + message: e.to_string(), + range: Range::default(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + DuplicateIdentifier(_, span) + | UnknownIdentifier(_, span) + | InvalidDevice(_, span) + | AgrumentMismatch(_, span) => Diagnostic { + range: span.into(), + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + Unknown(msg, span) => Diagnostic { + message: msg.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + range: span.map(lsp_types::Range::from).unwrap_or_default(), + ..Default::default() + }, + } + } +} + +// Map io::Error to Error manually since we can't clone io::Error +impl From for Error { + fn from(err: std::io::Error) -> Self { + Error::IoError(err.to_string()) + } +} + #[derive(Default)] #[repr(C)] pub struct CompilerConfig { @@ -77,7 +120,7 @@ struct CompilationResult { } pub struct Compiler<'a, W: std::io::Write> { - parser: ASTParser<'a>, + pub parser: ASTParser<'a>, function_locations: HashMap, function_metadata: HashMap>, devices: HashMap, @@ -88,6 +131,7 @@ pub struct Compiler<'a, W: std::io::Write> { temp_counter: usize, label_counter: usize, loop_stack: Vec<(String, String)>, // Stores (start_label, end_label) + pub errors: Vec, } impl<'a, W: std::io::Write> Compiler<'a, W> { @@ -108,19 +152,30 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { temp_counter: 0, label_counter: 0, loop_stack: Vec::new(), + errors: Vec::new(), } } - pub fn compile(mut self) -> Result<(), Error> { - let expr = self.parser.parse_all()?; + pub fn compile(mut self) -> Vec { + let expr = self.parser.parse_all(); - let Some(expr) = expr else { return Ok(()) }; + // Copy errors from parser + for e in std::mem::take(&mut self.parser.errors) { + self.errors.push(Error::ParseError(e)); + } - // Wrap the root expression in a dummy span for consistency, - // since parse_all returns an unspanned Expression (usually Block) - // that contains spanned children. - // We know parse_all returns Expression::Block which has an internal span, - // but for type consistency we wrap it. + // We treat parse_all result as potentially partial + let expr = match expr { + Ok(Some(expr)) => expr, + Ok(None) => return self.errors, + Err(e) => { + // Should be covered by parser.errors, but just in case + self.errors.push(Error::ParseError(e)); + return self.errors; + } + }; + + // Wrap the root expression in a dummy span for consistency let span = if let Expression::Block(ref block) = expr { block.span } else { @@ -134,10 +189,17 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let spanned_root = Spanned { node: expr, span }; - self.write_output("j main")?; + if let Err(e) = self.write_output("j main") { + self.errors.push(e); + return self.errors; + } + // We ignore the result of the root expression (usually a block) - let _ = self.expression(spanned_root, &mut VariableScope::default())?; - Ok(()) + if let Err(e) = self.expression(spanned_root, &mut VariableScope::default()) { + self.errors.push(e); + } + + self.errors } fn write_output(&mut self, output: impl Into) -> Result<(), Error> { @@ -255,13 +317,20 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { _ => Ok(None), // String literals don't return values in this context typically }, Expression::Variable(name) => { - let loc = scope - .get_location_of(&name.node) - .map_err(|_| Error::UnknownIdentifier(name.node.clone(), name.span))?; - Ok(Some(CompilationResult { - location: loc, - temp_name: None, // User variable, do not free - })) + match scope.get_location_of(&name.node) { + Ok(loc) => Ok(Some(CompilationResult { + location: loc, + temp_name: None, // User variable, do not free + })), + Err(_) => { + self.errors + .push(Error::UnknownIdentifier(name.node.clone(), name.span)); + Ok(Some(CompilationResult { + location: VariableLocation::Temporary(0), + temp_name: None, + })) + } + } } Expression::Priority(inner_expr) => self.expression(*inner_expr, scope), Expression::Negation(inner_expr) => { @@ -428,9 +497,16 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { (var_loc, None) } Expression::Variable(name) => { - let src_loc = scope - .get_location_of(&name.node) - .map_err(|_| Error::UnknownIdentifier(name.node.clone(), name.span))?; + let src_loc_res = scope.get_location_of(&name.node); + + let src_loc = match src_loc_res { + Ok(l) => l, + Err(_) => { + self.errors + .push(Error::UnknownIdentifier(name.node.clone(), name.span)); + VariableLocation::Temporary(0) + } + }; let var_loc = scope.add_variable(&name_str, LocationRequest::Persist)?; @@ -488,9 +564,16 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { expression, } = expr; - let location = scope - .get_location_of(&identifier.node) - .map_err(|_| Error::UnknownIdentifier(identifier.node.clone(), identifier.span))?; + let location = match scope.get_location_of(&identifier.node) { + Ok(l) => l, + Err(_) => { + self.errors.push(Error::UnknownIdentifier( + identifier.node.clone(), + identifier.span, + )); + VariableLocation::Temporary(0) + } + }; let (val_str, cleanup) = self.compile_operand(*expression, scope)?; @@ -533,15 +616,26 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let InvocationExpression { name, arguments } = invoke_expr.node; if !self.function_locations.contains_key(&name.node) { - return Err(Error::UnknownIdentifier(name.node.clone(), name.span)); + self.errors + .push(Error::UnknownIdentifier(name.node.clone(), name.span)); + // Don't emit call, just pretend we did? + // Actually, we should probably emit a dummy call or just skip to avoid logic errors + // But if we skip, registers might be unbalanced if something expected a return. + // For now, let's just return early. + return Ok(()); } let Some(args) = self.function_metadata.get(&name.node) else { + // Should be covered by check above return Err(Error::UnknownIdentifier(name.node.clone(), name.span)); }; if args.len() != arguments.len() { - return Err(Error::AgrumentMismatch(name.node, name.span)); + self.errors + .push(Error::AgrumentMismatch(name.node.clone(), name.span)); + // Proceed anyway? The assembly will likely crash or act weird. + // Best to skip generation of this call to prevent bad IC10 + return Ok(()); } // backup all used registers to the stack @@ -564,9 +658,14 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { _ => {} }, Expression::Variable(var_name) => { - let loc = stack - .get_location_of(var_name.node.clone()) - .map_err(|_| Error::UnknownIdentifier(var_name.node, var_name.span))?; + let loc = match stack.get_location_of(var_name.node.clone()) { + Ok(l) => l, + Err(_) => { + self.errors + .push(Error::UnknownIdentifier(var_name.node, var_name.span)); + VariableLocation::Temporary(0) + } + }; match loc { VariableLocation::Persistant(reg) | VariableLocation::Temporary(reg) => { @@ -655,7 +754,12 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { span: Span, ) -> Result<(), Error> { if self.devices.contains_key(&expr.name.node) { - return Err(Error::DuplicateIdentifier(expr.name.node, span)); + self.errors + .push(Error::DuplicateIdentifier(expr.name.node.clone(), span)); + // We can overwrite or ignore. Let's ignore new declaration to avoid cascading errors? + // Actually, for recovery, maybe we want to allow it so subsequent uses work? + // But we already have it. + return Ok(()); } self.devices.insert(expr.name.node, expr.device); @@ -832,10 +936,15 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { return Ok((format!("-{}", n), None)); } - let result = self.expression(expr, scope)?.ok_or(Error::Unknown( - "Expression did not return a value".into(), - None, - ))?; + let result_opt = self.expression(expr, scope)?; + + let result = match result_opt { + Some(r) => r, + None => { + // Expression failed or returned void. Recover with dummy. + return Ok(("r0".to_string(), None)); + } + }; match result.location { VariableLocation::Temporary(r) | VariableLocation::Persistant(r) => { @@ -1032,13 +1141,18 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.expression_return(*ret_expr, scope)?; } _ => { - let result = self.expression(expr, scope)?; - // If the expression was a statement that returned a temp result (e.g. `1 + 2;` line), - // we must free it to avoid leaking registers. - if let Some(comp_res) = result - && let Some(name) = comp_res.temp_name - { - scope.free_temp(name)?; + // Swallow errors within expressions so block can continue + if let Err(e) = self.expression(expr, scope).and_then(|result| { + // If the expression was a statement that returned a temp result (e.g. `1 + 2;` line), + // we must free it to avoid leaking registers. + if let Some(comp_res) = result + && let Some(name) = comp_res.temp_name + { + scope.free_temp(name)?; + } + Ok(()) + }) { + self.errors.push(e); } } } @@ -1063,27 +1177,33 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { }; match expr.node { - Expression::Variable(var_name) => match scope - .get_location_of(&var_name.node) - .map_err(|_| Error::UnknownIdentifier(var_name.node, var_name.span))? - { - VariableLocation::Temporary(reg) | VariableLocation::Persistant(reg) => { - self.write_output(format!( - "move r{} r{reg} {}", - VariableScope::RETURN_REGISTER, - debug!(self, "#returnValue") - ))?; - } - VariableLocation::Stack(offset) => { - self.write_output(format!( - "sub r{} sp {offset}", - VariableScope::TEMP_STACK_REGISTER - ))?; - self.write_output(format!( - "get r{} db r{}", - VariableScope::RETURN_REGISTER, - VariableScope::TEMP_STACK_REGISTER - ))?; + Expression::Variable(var_name) => match scope.get_location_of(&var_name.node) { + Ok(loc) => match loc { + VariableLocation::Temporary(reg) | VariableLocation::Persistant(reg) => { + self.write_output(format!( + "move r{} r{reg} {}", + VariableScope::RETURN_REGISTER, + debug!(self, "#returnValue") + ))?; + } + VariableLocation::Stack(offset) => { + self.write_output(format!( + "sub r{} sp {offset}", + VariableScope::TEMP_STACK_REGISTER + ))?; + self.write_output(format!( + "get r{} db r{}", + VariableScope::RETURN_REGISTER, + VariableScope::TEMP_STACK_REGISTER + ))?; + } + }, + Err(_) => { + self.errors.push(Error::UnknownIdentifier( + var_name.node.clone(), + var_name.span, + )); + // Proceed with dummy } }, Expression::Literal(spanned_lit) => match spanned_lit.node { @@ -1189,9 +1309,18 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let device_name = device_spanned.node; - let Some(device_val) = self.devices.get(&device_name) else { - return Err(Error::InvalidDevice(device_name, device_spanned.span)); - }; + if !self.devices.contains_key(&device_name) { + self.errors.push(Error::InvalidDevice( + device_name.clone(), + device_spanned.span, + )); + } + + let device_val = self + .devices + .get(&device_name) + .cloned() + .unwrap_or("d0".to_string()); let Literal::String(logic_type) = logic_type else { return Err(Error::AgrumentMismatch( @@ -1241,9 +1370,18 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { let device_name = device_spanned.node; - let Some(device_val) = self.devices.get(&device_name) else { - return Err(Error::InvalidDevice(device_name, device_spanned.span)); - }; + if !self.devices.contains_key(&device_name) { + self.errors.push(Error::InvalidDevice( + device_name.clone(), + device_spanned.span, + )); + } + + let device_val = self + .devices + .get(&device_name) + .cloned() + .unwrap_or("d0".to_string()); let Literal::String(logic_type) = logic_type else { return Err(Error::AgrumentMismatch( @@ -1286,7 +1424,10 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { } = expr.node; if self.function_locations.contains_key(&name.node) { - return Err(Error::DuplicateIdentifier(name.node.clone(), name.span)); + self.errors + .push(Error::DuplicateIdentifier(name.node.clone(), name.span)); + // Fallthrough to allow compiling the body anyway? + // It might be useful to check body for errors. } self.function_metadata.insert( @@ -1356,26 +1497,33 @@ impl<'a, W: std::io::Write> Compiler<'a, W> { self.expression_return(*ret_expr, &mut block_scope)?; } _ => { - let result = self.expression(expr, &mut block_scope)?; - // Free unused statement results - if let Some(comp_res) = result - && let Some(name) = comp_res.temp_name - { - block_scope.free_temp(name)?; + // Swallow internal errors + if let Err(e) = self.expression(expr, &mut block_scope).and_then(|result| { + if let Some(comp_res) = result + && let Some(name) = comp_res.temp_name + { + block_scope.free_temp(name)?; + } + Ok(()) + }) { + self.errors.push(e); } } } } // Get the saved return address and save it back into `ra` - let VariableLocation::Stack(ra_stack_offset) = block_scope - .get_location_of(format!("{}_ra", name.node)) - .map_err(Error::ScopeError)? - else { - return Err(Error::Unknown( - "Stored return address not in stack as expected".into(), - Some(name.span), - )); + let ra_res = block_scope.get_location_of(format!("{}_ra", name.node)); + let ra_stack_offset = match ra_res { + Ok(VariableLocation::Stack(offset)) => offset, + _ => { + // If we can't find RA, we can't return properly. + // This usually implies a compiler bug or scope tracking error. + return Err(Error::Unknown( + "Stored return address not in stack as expected".into(), + Some(name.span), + )); + } }; self.write_output(format!( diff --git a/rust_compiler/libs/parser/Cargo.toml b/rust_compiler/libs/parser/Cargo.toml index 2d1639a..5ff0cd5 100644 --- a/rust_compiler/libs/parser/Cargo.toml +++ b/rust_compiler/libs/parser/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] quick-error = { workspace = true } tokenizer = { path = "../tokenizer" } +lsp-types = { workspace = true } [dev-dependencies] diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index 19ab4ec..85496d9 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -48,6 +48,30 @@ quick_error! { } } +impl From for lsp_types::Diagnostic { + fn from(value: Error) -> Self { + use Error::*; + use lsp_types::*; + match value { + TokenizerError(e) => e.into(), + UnexpectedToken(span, _) + | DuplicateIdentifier(span, _) + | InvalidSyntax(span, _) + | UnsupportedKeyword(span, _) => Diagnostic { + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + range: span.into(), + ..Default::default() + }, + UnexpectedEOF => Diagnostic { + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + } + } +} + macro_rules! self_matches_peek { ($self:ident, $pattern:pat) => { matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. })) @@ -84,6 +108,7 @@ macro_rules! self_matches_current { pub struct Parser<'a> { tokenizer: TokenizerBuffer<'a>, current_token: Option, + pub errors: Vec, } impl<'a> Parser<'a> { @@ -91,6 +116,7 @@ impl<'a> Parser<'a> { Parser { tokenizer: TokenizerBuffer::new(tokenizer), current_token: None, + errors: Vec::new(), } } @@ -158,8 +184,45 @@ impl<'a> Parser<'a> { }) } + /// Skips tokens until a statement boundary is found to recover from errors. + fn synchronize(&mut self) -> Result<(), Error> { + // We advance once to consume the error-causing token if we haven't already + // But often the error happens after we consumed something. + // Safe bet: consume current, then look. + + // If we assign next, we might be skipping the very token we want to sync on if the error didn't consume it? + // Usually, in recursive descent, the error is raised when `current` is unexpected. + // We want to discard `current` and move on. + self.assign_next()?; + + while let Some(token) = &self.current_token { + if token.token_type == TokenType::Symbol(Symbol::Semicolon) { + // Consuming the semicolon is a good place to stop and resume parsing next statement + self.assign_next()?; + return Ok(()); + } + + // Check if the token looks like the start of a statement. + // If so, we don't consume it; we return so the loop in parse_all can try to parse it. + match token.token_type { + TokenType::Keyword(Keyword::Fn) + | TokenType::Keyword(Keyword::Let) + | TokenType::Keyword(Keyword::If) + | TokenType::Keyword(Keyword::While) + | TokenType::Keyword(Keyword::Loop) + | TokenType::Keyword(Keyword::Device) + | TokenType::Keyword(Keyword::Return) => return Ok(()), + _ => {} + } + + self.assign_next()?; + } + + Ok(()) + } + pub fn parse_all(&mut self) -> Result, Error> { - let first_token = self.tokenizer.peek()?; + let first_token = self.tokenizer.peek().unwrap_or(None); let (start_line, start_col) = first_token .as_ref() .map(|tok| (tok.line, tok.column)) @@ -167,28 +230,38 @@ impl<'a> Parser<'a> { let mut expressions = Vec::>::new(); - while let Some(expression) = self.parse()? { - expressions.push(expression); + loop { + // Check EOF without unwrapping error + match self.tokenizer.peek() { + Ok(None) => break, + Err(e) => { + self.errors.push(Error::TokenizerError(e)); + break; + } + _ => {} + } + + match self.parse() { + Ok(Some(expression)) => { + expressions.push(expression); + } + Ok(None) => break, + Err(e) => { + self.errors.push(e); + // Recover + if self.synchronize().is_err() { + // If sync failed (e.g. EOF during sync), break + break; + } + } + } } - if expressions.is_empty() { - let span = Span { - start_line, - end_line: start_line, - start_col, - end_col: start_col, - }; - - return Ok(Some(Expression::Block(Spanned { - node: BlockExpression(vec![]), - span, - }))); - } - - self.tokenizer.seek(SeekFrom::Current(-1))?; - - let end_token_opt = self.tokenizer.peek()?; + // Even if we had errors, we return whatever partial AST we managed to build. + // If expressions is empty and we had errors, it's a failed parse, but we return a block. + // Use the last token position for end span, or start if nothing parsed + let end_token_opt = self.tokenizer.peek().unwrap_or(None); let (end_line, end_col) = end_token_opt .map(|tok| { let len = tok.original_string.as_ref().map(|s| s.len()).unwrap_or(0); @@ -211,6 +284,12 @@ impl<'a> Parser<'a> { pub fn parse(&mut self) -> Result>, Error> { self.assign_next()?; + + // If assign_next hit EOF or error? + if self.current_token.is_none() { + return Ok(None); + } + let expr = self.expression()?; if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) { @@ -1469,4 +1548,3 @@ impl<'a> Parser<'a> { } } } - diff --git a/rust_compiler/libs/parser/src/tree_node.rs b/rust_compiler/libs/parser/src/tree_node.rs index 8133b27..a968ed4 100644 --- a/rust_compiler/libs/parser/src/tree_node.rs +++ b/rust_compiler/libs/parser/src/tree_node.rs @@ -222,6 +222,36 @@ pub struct Span { pub end_col: usize, } +impl From for lsp_types::Range { + fn from(value: Span) -> Self { + Self { + start: lsp_types::Position { + line: value.start_line as u32, + character: value.start_col as u32, + }, + end: lsp_types::Position { + line: value.end_line as u32, + character: value.end_col as u32, + }, + } + } +} + +impl From<&Span> for lsp_types::Range { + fn from(value: &Span) -> Self { + Self { + start: lsp_types::Position { + line: value.start_line as u32, + character: value.start_col as u32, + }, + end: lsp_types::Position { + line: value.end_line as u32, + character: value.end_col as u32, + }, + } + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct Spanned { pub span: Span, @@ -293,4 +323,3 @@ impl std::fmt::Display for Expression { } } } - diff --git a/rust_compiler/libs/tokenizer/Cargo.toml b/rust_compiler/libs/tokenizer/Cargo.toml index 100b2b7..38032f4 100644 --- a/rust_compiler/libs/tokenizer/Cargo.toml +++ b/rust_compiler/libs/tokenizer/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] rust_decimal = { workspace = true } quick-error = { workspace = true } +lsp-types = { workspace = true } [dev-dependencies] anyhow = { version = "^1" } diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs index 8f2dc41..9434c2d 100644 --- a/rust_compiler/libs/tokenizer/src/lib.rs +++ b/rust_compiler/libs/tokenizer/src/lib.rs @@ -35,6 +35,39 @@ quick_error! { } } +impl From for lsp_types::Diagnostic { + fn from(value: Error) -> Self { + use Error::*; + use lsp_types::*; + + match value { + IOError(e) => Diagnostic { + message: e.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + NumberParseError(_, l, c, ref og) + | DecimalParseError(_, l, c, ref og) + | UnknownSymbolError(_, l, c, ref og) + | UnknownKeywordOrIdentifierError(_, l, c, ref og) => Diagnostic { + range: Range { + start: Position { + line: l as u32, + character: c as u32, + }, + end: Position { + line: l as u32, + character: (c + og.len()) as u32, + }, + }, + message: value.to_string(), + severity: Some(DiagnosticSeverity::ERROR), + ..Default::default() + }, + } + } +} + pub trait Tokenize: Read + Seek {} impl Tokenize for T where T: Read + Seek {} diff --git a/rust_compiler/src/ffi/mod.rs b/rust_compiler/src/ffi/mod.rs new file mode 100644 index 0000000..4754a20 --- /dev/null +++ b/rust_compiler/src/ffi/mod.rs @@ -0,0 +1,103 @@ +use compiler::Compiler; +use parser::Parser; +use safer_ffi::prelude::*; +use std::io::BufWriter; +use tokenizer::Tokenizer; + +#[derive_ReprC] +#[repr(C)] +pub struct FfiToken { + pub tooltip: safer_ffi::String, + pub error: safer_ffi::String, + pub column: i32, + pub length: i32, + pub token_kind: u32, +} + +#[derive_ReprC] +#[repr(C)] +pub struct FfiRange { + start_col: u32, + end_col: u32, + start_line: u32, + end_line: u32, +} + +impl From for FfiRange { + fn from(value: lsp_types::Range) -> Self { + Self { + start_col: value.start.character, + end_col: value.end.character, + start_line: value.start.line, + end_line: value.end.line, + } + } +} + +#[derive_ReprC] +#[repr(C)] +pub struct FfiDiagnostic { + message: safer_ffi::String, + severity: i32, + range: FfiRange, +} + +impl From for FfiDiagnostic { + fn from(value: lsp_types::Diagnostic) -> Self { + use lsp_types::*; + Self { + message: value.message.into(), + severity: match value.severity.unwrap_or(DiagnosticSeverity::ERROR) { + DiagnosticSeverity::WARNING => 2, + DiagnosticSeverity::INFORMATION => 3, + DiagnosticSeverity::HINT => 4, + _ => 1, + }, + range: value.range.into(), + } + } +} + +#[ffi_export] +pub fn free_ffi_token_vec(v: safer_ffi::Vec) { + drop(v) +} + +#[ffi_export] +pub fn free_ffi_diagnostic_vec(v: safer_ffi::Vec) { + drop(v) +} + +#[ffi_export] +pub fn free_string(s: safer_ffi::String) { + drop(s) +} + +/// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because +/// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. +/// This should result in the ability to compile many times without triggering frame drops +/// from the GC from a `GetBytes()` call on a string in C#. +#[ffi_export] +pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::String { + let mut writer = BufWriter::new(Vec::new()); + + let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); + let parser = Parser::new(tokenizer); + let compiler = Compiler::new(parser, &mut writer, None); + + if !compiler.compile().is_empty() { + return safer_ffi::String::EMPTY; + } + + let Ok(compiled_vec) = writer.into_inner() else { + return safer_ffi::String::EMPTY; + }; + + // Safety: I know the compiler only outputs valid utf8 + safer_ffi::String::from(unsafe { String::from_utf8_unchecked(compiled_vec) }) +} + +#[ffi_export] +pub fn diagnose_source() -> safer_ffi::Vec { + vec![].into() +} diff --git a/rust_compiler/src/lib.rs b/rust_compiler/src/lib.rs index c171cdd..6ddcd4d 100644 --- a/rust_compiler/src/lib.rs +++ b/rust_compiler/src/lib.rs @@ -1,107 +1,5 @@ -use compiler::Compiler; -use parser::Parser; -use safer_ffi::prelude::*; -use std::io::BufWriter; -use tokenizer::{token::TokenType, Error as TokenizerError, Tokenizer}; - -#[derive_ReprC] -#[repr(C)] -pub struct FfiToken { - pub tooltip: safer_ffi::String, - pub error: safer_ffi::String, - pub column: i32, - pub length: i32, - pub token_kind: u32, -} - -fn map_token_kind(t: &TokenType) -> u32 { - use TokenType::*; - match t { - Keyword(_) => 1, - Identifier(_) => 2, - Number(_) => 3, - String(_) => 4, - Boolean(_) => 5, - Symbol(_) => 6, - _ => 0, - } -} - -/// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because -/// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. -/// This should result in the ability to compile many times without triggering frame drops -/// from the GC from a `GetBytes()` call on a string in C#. -#[ffi_export] -pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::String { - let mut writer = BufWriter::new(Vec::new()); - - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); - let parser = Parser::new(tokenizer); - let compiler = Compiler::new(parser, &mut writer, None); - - if compiler.compile().is_err() { - return safer_ffi::String::EMPTY; - } - - let Ok(compiled_vec) = writer.into_inner() else { - return safer_ffi::String::EMPTY; - }; - - // Safety: I know the compiler only outputs valid utf8 - safer_ffi::String::from(unsafe { String::from_utf8_unchecked(compiled_vec) }) -} -/// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because -/// we want to avoid GC. So we pass it to Rust to handle all the memory allocations. -/// This should result in the ability to tokenize many times without triggering frame drops -/// from the GC from a `GetBytes()` call on a string in C#. -#[ffi_export] -pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { - let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); - - let mut tokens = Vec::::new(); - - for token in tokenizer { - match token { - Err(TokenizerError::NumberParseError(_, _, col, ref str)) - | Err(TokenizerError::UnknownSymbolError(_, _, col, ref str)) - | Err(TokenizerError::DecimalParseError(_, _, col, ref str)) - | Err(TokenizerError::UnknownKeywordOrIdentifierError(_, _, col, ref str)) => { - tokens.push(FfiToken { - column: col as i32 - 1, - tooltip: "".into(), - length: str.len() as i32, - token_kind: 0, - // Safety: it's okay to unwrap the err here because we are matching on the `Err` variant - error: token.unwrap_err().to_string().into(), - }); - } - Err(_) => return safer_ffi::Vec::EMPTY, - Ok(token) if !matches!(token.token_type, TokenType::EOF) => tokens.push(FfiToken { - tooltip: "".into(), - error: "".into(), - length: token - .original_string - .map(|s| s.len() as i32) - .unwrap_or_default(), - token_kind: map_token_kind(&token.token_type), - column: token.column as i32 - 1, - }), - _ => {} - } - } - - tokens.into() -} - -#[ffi_export] -pub fn free_ffi_token_vec(v: safer_ffi::Vec) { - drop(v) -} - -#[ffi_export] -pub fn free_string(s: safer_ffi::String) { - drop(s) -} +mod ffi; +pub(crate) mod lsp; #[cfg(feature = "headers")] pub fn generate_headers() -> std::io::Result<()> { diff --git a/rust_compiler/src/lsp/mod.rs b/rust_compiler/src/lsp/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/rust_compiler/src/main.rs b/rust_compiler/src/main.rs index 619d696..730b2a2 100644 --- a/rust_compiler/src/main.rs +++ b/rust_compiler/src/main.rs @@ -8,7 +8,7 @@ use compiler::Compiler; use parser::Parser as ASTParser; use std::{ fs::File, - io::{BufWriter, Read, Write}, + io::{stderr, BufWriter, Read, Write}, path::PathBuf, }; use tokenizer::{self, Tokenizer}; @@ -75,7 +75,22 @@ fn run_logic() -> Result<(), StationlangError> { let compiler = Compiler::new(parser, &mut writer, None); - compiler.compile()?; + let mut errors = compiler.compile(); + + if !errors.is_empty() { + let mut std_error = stderr(); + let last = errors.pop(); + let errors = errors.into_iter().map(StationlangError::from); + + std_error.write_all(b"Compilation error:\n")?; + + for err in errors { + std_error.write_all(format!("{}\n", err).as_bytes())?; + } + + return Err(StationlangError::from(last.unwrap())); + } + writer.flush()?; Ok(())