From b8521917b8b60cc6f1d584eab5738fc6d4c456b8 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Fri, 2 Jan 2026 19:14:58 -0700 Subject: [PATCH] First pass getting user documentation in the IDE --- csharp_mod/Extensions.cs | 30 ++++ csharp_mod/FfiGlue.cs | 51 +++++++ csharp_mod/Formatter.cs | 73 +++++++-- csharp_mod/Marshal.cs | 80 ++++++++++ rust_compiler/libs/compiler/src/symbols.rs | 49 ++++-- rust_compiler/libs/compiler/src/test/mod.rs | 10 ++ .../compiler/src/test/symbol_documentation.rs | 120 +++++++++++++++ rust_compiler/libs/compiler/src/v1.rs | 98 ++++++++++-- rust_compiler/libs/parser/src/lib.rs | 58 ++++++- rust_compiler/libs/tokenizer/src/lib.rs | 24 ++- rust_compiler/src/ffi/mod.rs | 142 +++++++++++++++++- 11 files changed, 687 insertions(+), 48 deletions(-) create mode 100644 rust_compiler/libs/compiler/src/test/symbol_documentation.rs diff --git a/csharp_mod/Extensions.cs b/csharp_mod/Extensions.cs index dfab846..722dbc9 100644 --- a/csharp_mod/Extensions.cs +++ b/csharp_mod/Extensions.cs @@ -207,4 +207,34 @@ public static unsafe class SlangExtensions Ffi.free_docs_vec(vec); return toReturn; } + + public static unsafe List ToList(this Vec_FfiSymbolInfo_t vec) + { + var toReturn = new List((int)vec.len); + + var currentPtr = vec.ptr; + + for (int i = 0; i < (int)vec.len; i++) + { + var item = currentPtr[i]; + + toReturn.Add( + new Slang.Symbol + { + Name = item.name.AsString(), + Kind = (SymbolKind)item.kind_data.kind, + Span = new Slang.Range + { + StartLine = item.span.start_line, + StartCol = item.span.start_col, + EndLine = item.span.end_line, + EndCol = item.span.end_col, + }, + Description = item.description.AsString(), + } + ); + } + + return toReturn; + } } diff --git a/csharp_mod/FfiGlue.cs b/csharp_mod/FfiGlue.cs index c2eb521..d839f4d 100644 --- a/csharp_mod/FfiGlue.cs +++ b/csharp_mod/FfiGlue.cs @@ -147,6 +147,51 @@ public unsafe partial class Ffi { slice_ref_uint16_t input); } +[StructLayout(LayoutKind.Sequential, Size = 12)] +public unsafe struct FfiSymbolKindData_t { + public UInt32 kind; + + public UInt32 arg_count; + + public UInt32 syscall_type; +} + +[StructLayout(LayoutKind.Sequential, Size = 80)] +public unsafe struct FfiSymbolInfo_t { + public Vec_uint8_t name; + + public FfiSymbolKindData_t kind_data; + + public FfiRange_t span; + + public Vec_uint8_t description; +} + +/// +/// Same as [Vec][rust::Vec], but with guaranteed #[repr(C)] layout +/// +[StructLayout(LayoutKind.Sequential, Size = 24)] +public unsafe struct Vec_FfiSymbolInfo_t { + public FfiSymbolInfo_t * ptr; + + public UIntPtr len; + + public UIntPtr cap; +} + +[StructLayout(LayoutKind.Sequential, Size = 48)] +public unsafe struct FfiDiagnosticsAndSymbols_t { + public Vec_FfiDiagnostic_t diagnostics; + + public Vec_FfiSymbolInfo_t symbols; +} + +public unsafe partial class Ffi { + [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern + FfiDiagnosticsAndSymbols_t diagnose_source_with_symbols ( + slice_ref_uint16_t input); +} + [StructLayout(LayoutKind.Sequential, Size = 48)] public unsafe struct FfiDocumentedItem_t { public Vec_uint8_t item_name; @@ -184,6 +229,12 @@ public unsafe partial class Ffi { Vec_FfiDiagnostic_t v); } +public unsafe partial class Ffi { + [DllImport(RustLib, ExactSpelling = true)] public static unsafe extern + void free_ffi_diagnostics_and_symbols ( + FfiDiagnosticsAndSymbols_t v); +} + [StructLayout(LayoutKind.Sequential, Size = 64)] public unsafe struct FfiToken_t { public Vec_uint8_t tooltip; diff --git a/csharp_mod/Formatter.cs b/csharp_mod/Formatter.cs index c4eb4e1..414377d 100644 --- a/csharp_mod/Formatter.cs +++ b/csharp_mod/Formatter.cs @@ -171,18 +171,17 @@ public class SlangFormatter : ICodeFormatter return; // Running this potentially CPU intensive work on a background thread. - var dict = await Task.Run( + var (diagnostics, symbols) = await Task.Run( () => { - return Marshal - .DiagnoseSource(inputSrc) - .GroupBy(d => d.Range.StartLine) - .ToDictionary(g => g.Key); + return Marshal.DiagnoseSourceWithSymbols(inputSrc); }, cancellationToken ); - ApplyDiagnostics(dict); + var dict = diagnostics.GroupBy(d => d.Range.StartLine).ToDictionary(g => g.Key); + + ApplyDiagnosticsAndSymbols(dict, symbols); // If we have valid code, update the IC10 output if (dict.Count > 0) @@ -266,11 +265,11 @@ public class SlangFormatter : ICodeFormatter } /// - /// Takes diagnostics from the Rust FFI compiler and applies it as semantic tokens to the + /// Takes diagnostics and symbols from the Rust FFI compiler and applies them as semantic tokens to the /// source in this editor. /// This runs on the Main Thread /// - private void ApplyDiagnostics(Dictionary> dict) + private void ApplyDiagnosticsAndSymbols(Dictionary> dict, List symbols) { HashSet linesToRefresh; @@ -289,6 +288,12 @@ public class SlangFormatter : ICodeFormatter { linesToRefresh = new HashSet(dict.Keys); linesToRefresh.UnionWith(_linesWithErrors); + + // Also add lines with symbols that may have been modified + foreach (var symbol in symbols) + { + linesToRefresh.Add(symbol.Span.StartLine); + } } _lastLineCount = this.Lines.Count; @@ -328,9 +333,49 @@ public class SlangFormatter : ICodeFormatter } } + // 3. Add symbol tooltips for symbols on this line + foreach (var symbol in symbols) + { + if (symbol.Span.StartLine == lineIndex) + { + var column = (int)symbol.Span.StartCol; + var length = Math.Max(1, (int)(symbol.Span.EndCol - symbol.Span.StartCol)); + + // If there's already a token at this position (from syntax highlighting), use it + // Otherwise, create a new token for the symbol + if (allTokensDict.ContainsKey(column)) + { + // Update existing token with symbol tooltip + var existingToken = allTokensDict[column]; + allTokensDict[column] = new SemanticToken( + line: existingToken.Line, + column: existingToken.Column, + length: existingToken.Length, + type: existingToken.Type, + style: existingToken.Style, + data: symbol.Description, // Use symbol description as tooltip + isError: existingToken.IsError + ); + } + else + { + // Create new token for symbol + allTokensDict[column] = new SemanticToken( + line: (int)lineIndex, + column, + length, + type: 0, + style: ColorIdentifier, + data: symbol.Description, + isError: false + ); + } + } + } + var allTokens = allTokensDict.Values.ToList(); - // 3. Update the line (this clears existing tokens and uses the list we just built) + // 4. Update the line (this clears existing tokens and uses the list we just built) line.Update(allTokens); ReattachMetadata(line, allTokens); @@ -339,6 +384,16 @@ public class SlangFormatter : ICodeFormatter _linesWithErrors = new HashSet(dict.Keys); } + /// + /// Takes diagnostics from the Rust FFI compiler and applies it as semantic tokens to the + /// source in this editor. + /// This runs on the Main Thread + /// + private void ApplyDiagnostics(Dictionary> dict) + { + ApplyDiagnosticsAndSymbols(dict, new List()); + } + // Helper to map SemanticToken data (tooltips/errors) back to the tokens in the line private void ReattachMetadata(StyledLine line, List semanticTokens) { diff --git a/csharp_mod/Marshal.cs b/csharp_mod/Marshal.cs index 672f1ac..1f44090 100644 --- a/csharp_mod/Marshal.cs +++ b/csharp_mod/Marshal.cs @@ -47,6 +47,33 @@ public struct SourceMapEntry } } +public struct Symbol +{ + public string Name; + public Range Span; + public SymbolKind Kind; + public string Description; + + public override string ToString() + { + return $"{Kind}: {Name} at {Span}"; + } +} + +public enum SymbolKind +{ + Function = 0, + Syscall = 1, + Variable = 2, +} + +public struct SymbolData +{ + public uint Kind; + public uint ArgCount; + public uint SyscallType; // 0=System, 1=Math +} + public static class Marshal { private static IntPtr _libraryHandle = IntPtr.Zero; @@ -164,6 +191,59 @@ public static class Marshal } } + public static unsafe (List, List) DiagnoseSourceWithSymbols(string inputString) + { + if (string.IsNullOrEmpty(inputString) || !EnsureLibLoaded()) + { + return (new(), new()); + } + + fixed (char* ptrInput = inputString) + { + var input = new slice_ref_uint16_t + { + ptr = (ushort*)ptrInput, + len = (UIntPtr)inputString.Length, + }; + + var result = Ffi.diagnose_source_with_symbols(input); + + // Convert diagnostics + var diagnostics = result.diagnostics.ToList(); + + // Convert symbols + var symbols = new List(); + var symbolPtr = result.symbols.ptr; + var symbolCount = (int)result.symbols.len; + + for (int i = 0; i < symbolCount; i++) + { + var ffiSymbol = symbolPtr[i]; + var kind = (SymbolKind)ffiSymbol.kind_data.kind; + + // Use the actual description from the FFI (includes doc comments and syscall docs) + var description = ffiSymbol.description.AsString(); + + symbols.Add(new Symbol + { + Name = ffiSymbol.name.AsString(), + Kind = kind, + Span = new Range( + ffiSymbol.span.start_line, + ffiSymbol.span.start_col, + ffiSymbol.span.end_line, + ffiSymbol.span.end_col + ), + Description = description, + }); + } + + Ffi.free_ffi_diagnostics_and_symbols(result); + + return (diagnostics, symbols); + } + } + public static unsafe List TokenizeLine(string inputString) { if (string.IsNullOrEmpty(inputString) || !EnsureLibLoaded()) diff --git a/rust_compiler/libs/compiler/src/symbols.rs b/rust_compiler/libs/compiler/src/symbols.rs index 3dc4207..5fde9f6 100644 --- a/rust_compiler/libs/compiler/src/symbols.rs +++ b/rust_compiler/libs/compiler/src/symbols.rs @@ -162,6 +162,17 @@ impl<'a> CompilationMetadata<'a> { name: Cow<'a, str>, parameters: Vec>, span: Option, + ) { + self.add_function_with_doc(name, parameters, span, None); + } + + /// Adds a function symbol with optional doc comment. + pub fn add_function_with_doc( + &mut self, + name: Cow<'a, str>, + parameters: Vec>, + span: Option, + description: Option>, ) { self.add_symbol(SymbolInfo { name, @@ -170,7 +181,7 @@ impl<'a> CompilationMetadata<'a> { return_type: None, }, span, - description: None, + description, }); } @@ -181,6 +192,18 @@ impl<'a> CompilationMetadata<'a> { syscall_type: SyscallType, argument_count: usize, span: Option, + ) { + self.add_syscall_with_doc(name, syscall_type, argument_count, span, None); + } + + /// Adds a syscall symbol with optional doc comment. + pub fn add_syscall_with_doc( + &mut self, + name: Cow<'a, str>, + syscall_type: SyscallType, + argument_count: usize, + span: Option, + description: Option>, ) { self.add_symbol(SymbolInfo { name, @@ -189,17 +212,27 @@ impl<'a> CompilationMetadata<'a> { argument_count, }, span, - description: None, + description, }); } /// Adds a variable symbol. pub fn add_variable(&mut self, name: Cow<'a, str>, span: Option) { + self.add_variable_with_doc(name, span, None); + } + + /// Adds a variable symbol with optional doc comment. + pub fn add_variable_with_doc( + &mut self, + name: Cow<'a, str>, + span: Option, + description: Option>, + ) { self.add_symbol(SymbolInfo { name, kind: SymbolKind::Variable { type_hint: None }, span, - description: None, + description, }); } @@ -276,16 +309,6 @@ mod tests { assert_eq!(variables.len(), 1); } - #[test] - #[ignore] // Requires complex Uri construction - fn test_lsp_symbol_conversion() { - let mut metadata = CompilationMetadata::new(); - metadata.add_function("test_func".into(), vec!["a".into(), "b".into()], None); - - // In real usage with LSP, Uri would be passed from the server - // This test demonstrates the conversion method exists and is type-safe - } - #[test] fn test_lsp_completion_items() { let mut metadata = CompilationMetadata::new(); diff --git a/rust_compiler/libs/compiler/src/test/mod.rs b/rust_compiler/libs/compiler/src/test/mod.rs index c732971..f34bd34 100644 --- a/rust_compiler/libs/compiler/src/test/mod.rs +++ b/rust_compiler/libs/compiler/src/test/mod.rs @@ -47,6 +47,15 @@ macro_rules! compile { output, } }}; + + (metadata $source:expr) => {{ + let compiler = crate::Compiler::new( + parser::Parser::new(tokenizer::Tokenizer::from($source)), + None, + ); + let res = compiler.compile(); + res.metadata + }}; } mod binary_expression; mod branching; @@ -61,5 +70,6 @@ mod loops; mod math_syscall; mod negation_priority; mod scoping; +mod symbol_documentation; mod syscall; mod tuple_literals; diff --git a/rust_compiler/libs/compiler/src/test/symbol_documentation.rs b/rust_compiler/libs/compiler/src/test/symbol_documentation.rs new file mode 100644 index 0000000..3ba830e --- /dev/null +++ b/rust_compiler/libs/compiler/src/test/symbol_documentation.rs @@ -0,0 +1,120 @@ +#[cfg(test)] +mod test { + use anyhow::Result; + + #[test] + fn test_variable_doc_comment() -> Result<()> { + let metadata = compile!(metadata "/// this is a documented variable\nlet myVar = 42;"); + + let var_symbol = metadata + .symbols + .iter() + .find(|s| s.name == "myVar") + .expect("myVar symbol not found"); + + assert_eq!( + var_symbol.description.as_ref().map(|d| d.as_ref()), + Some("this is a documented variable") + ); + Ok(()) + } + + #[test] + fn test_const_doc_comment() -> Result<()> { + let metadata = compile!(metadata "/// const documentation\nconst myConst = 100;"); + + let const_symbol = metadata + .symbols + .iter() + .find(|s| s.name == "myConst") + .expect("myConst symbol not found"); + + assert_eq!( + const_symbol.description.as_ref().map(|d| d.as_ref()), + Some("const documentation") + ); + Ok(()) + } + + #[test] + fn test_device_doc_comment() -> Result<()> { + let metadata = compile!(metadata "/// device documentation\ndevice myDevice = \"d0\";"); + + let device_symbol = metadata + .symbols + .iter() + .find(|s| s.name == "myDevice") + .expect("myDevice symbol not found"); + + assert_eq!( + device_symbol.description.as_ref().map(|d| d.as_ref()), + Some("device documentation") + ); + Ok(()) + } + + #[test] + fn test_function_doc_comment() -> Result<()> { + let metadata = compile!(metadata "/// function documentation\nfn test() { }"); + + let fn_symbol = metadata + .symbols + .iter() + .find(|s| s.name == "test") + .expect("test symbol not found"); + + assert_eq!( + fn_symbol.description.as_ref().map(|d| d.as_ref()), + Some("function documentation") + ); + Ok(()) + } + + #[test] + fn test_syscall_documentation() -> Result<()> { + let metadata = compile!(metadata "fn test() { clr(d0); }"); + + let clr_symbol = metadata + .symbols + .iter() + .find(|s| s.name == "clr") + .expect("clr syscall not found"); + + // clr should have its built-in documentation + assert!(clr_symbol.description.is_some()); + assert!(!clr_symbol.description.as_ref().unwrap().is_empty()); + Ok(()) + } + + #[test] + fn test_variable_references_have_tooltips() -> Result<()> { + let metadata = compile!(metadata "/// documented variable\nlet myVar = 5;\nlet x = myVar + 2;\nmyVar = 10;"); + + // Count how many times 'myVar' appears in symbols + let myvar_symbols: Vec<_> = metadata + .symbols + .iter() + .filter(|s| s.name == "myVar") + .collect(); + + // We should have at least 2: declaration + 1 reference (in myVar + 2) + // The assignment `myVar = 10` is a write, not a read, so doesn't create a reference + assert!( + myvar_symbols.len() >= 2, + "Expected at least 2 'myVar' symbols (declaration + reference), got {}", + myvar_symbols.len() + ); + + // All should have the same description + let expected_desc = "documented variable"; + for sym in &myvar_symbols { + assert_eq!( + sym.description.as_ref().map(|d| d.as_ref()), + Some(expected_desc), + "Symbol description mismatch at {:?}", + sym.span + ); + } + Ok(()) + } +} diff --git a/rust_compiler/libs/compiler/src/v1.rs b/rust_compiler/libs/compiler/src/v1.rs index 3aba4d4..66e6958 100644 --- a/rust_compiler/libs/compiler/src/v1.rs +++ b/rust_compiler/libs/compiler/src/v1.rs @@ -465,10 +465,23 @@ impl<'a> Compiler<'a> { }, Expression::Variable(name) => { match scope.get_location_of(&name.node, Some(name.span)) { - Ok(loc) => Ok(Some(CompileLocation { - location: loc, - temp_name: None, // User variable, do not free - })), + Ok(loc) => { + // Track this variable reference in metadata (for tooltips on all usages, not just declaration) + let doc_comment: Option> = self + .parser + .get_declaration_doc(name.node.as_ref()) + .map(|s| Cow::Owned(s) as Cow<'a, str>); + self.metadata.add_variable_with_doc( + name.node.clone(), + Some(name.span), + doc_comment, + ); + + Ok(Some(CompileLocation { + location: loc, + temp_name: None, // User variable, do not free + })) + } Err(_) => { // fallback, check devices if let Some(device) = self.devices.get(&name.node) { @@ -652,6 +665,14 @@ impl<'a> Compiler<'a> { if let Expression::Variable(ref name) = expr.node && let Some(device_id) = self.devices.get(&name.node) { + // Track this device reference in metadata (for tooltips on all usages, not just declaration) + let doc_comment = self + .parser + .get_declaration_doc(name.node.as_ref()) + .map(Cow::Owned); + self.metadata + .add_variable_with_doc(name.node.clone(), Some(expr.span), doc_comment); + return Ok((Operand::Device(device_id.clone()), None)); } @@ -705,8 +726,12 @@ impl<'a> Compiler<'a> { let name_span = var_name.span; // Track the variable in metadata + let doc_comment = self + .parser + .get_declaration_doc(name_str.as_ref()) + .map(Cow::Owned); self.metadata - .add_variable(name_str.clone(), Some(name_span)); + .add_variable_with_doc(name_str.clone(), Some(name_span), doc_comment); // optimization. Check for a negated numeric literal (including nested negations) // e.g., -5, -(-5), -(-(5)), etc. @@ -1068,8 +1093,15 @@ impl<'a> Compiler<'a> { } = expr; // Track the const variable in metadata - self.metadata - .add_variable(const_name.node.clone(), Some(const_name.span)); + let doc_comment = self + .parser + .get_declaration_doc(const_name.node.as_ref()) + .map(Cow::Owned); + self.metadata.add_variable_with_doc( + const_name.node.clone(), + Some(const_name.span), + doc_comment, + ); // check for a hash expression or a literal let value = match const_value { @@ -1495,10 +1527,25 @@ impl<'a> Compiler<'a> { let TupleDeclarationExpression { names, value } = tuple_decl; // Track each variable in the tuple declaration - for name_spanned in &names { + // Get doc for the first variable + let first_var_name = names + .iter() + .find(|n| n.node.as_ref() != "_") + .map(|n| n.node.to_string()); + let doc_comment = first_var_name + .as_ref() + .and_then(|name| self.parser.get_declaration_doc(name)) + .map(Cow::Owned); + + for (i, name_spanned) in names.iter().enumerate() { if name_spanned.node.as_ref() != "_" { - self.metadata - .add_variable(name_spanned.node.clone(), Some(name_spanned.span)); + // Only attach doc comment to the first variable + let comment = if i == 0 { doc_comment.clone() } else { None }; + self.metadata.add_variable_with_doc( + name_spanned.node.clone(), + Some(name_spanned.span), + comment, + ); } } @@ -1941,8 +1988,15 @@ impl<'a> Compiler<'a> { expr: DeviceDeclarationExpression<'a>, ) -> Result<(), Error<'a>> { // Track the device declaration in metadata - self.metadata - .add_variable(expr.name.node.clone(), Some(expr.name.span)); + let doc_comment = self + .parser + .get_declaration_doc(expr.name.node.as_ref()) + .map(Cow::Owned); + self.metadata.add_variable_with_doc( + expr.name.node.clone(), + Some(expr.name.span), + doc_comment, + ); if self.devices.contains_key(&expr.name.node) { self.errors.push(Error::DuplicateIdentifier( @@ -2950,11 +3004,13 @@ impl<'a> Compiler<'a> { ) -> Result>, Error<'a>> { // Track the syscall in metadata let syscall_name = expr.name(); - self.metadata.add_syscall( + let doc = expr.docs().into(); + self.metadata.add_syscall_with_doc( Cow::Borrowed(syscall_name), crate::SyscallType::System, expr.arg_count(), Some(span), + Some(doc), ); macro_rules! cleanup { @@ -3356,11 +3412,13 @@ impl<'a> Compiler<'a> { ) -> Result>, Error<'a>> { // Track the syscall in metadata let syscall_name = expr.name(); - self.metadata.add_syscall( + let doc = expr.docs().into(); + self.metadata.add_syscall_with_doc( Cow::Borrowed(syscall_name), crate::SyscallType::Math, expr.arg_count(), Some(span), + Some(doc), ); macro_rules! cleanup { @@ -3625,8 +3683,16 @@ impl<'a> Compiler<'a> { // Track the function definition in metadata let param_names: Vec> = arguments.iter().map(|a| a.node.clone()).collect(); - self.metadata - .add_function(name.node.clone(), param_names, Some(name.span)); + let doc_comment = self + .parser + .get_declaration_doc(name.node.as_ref()) + .map(Cow::Owned); + self.metadata.add_function_with_doc( + name.node.clone(), + param_names, + Some(name.span), + doc_comment, + ); if self.function_meta.locations.contains_key(&name.node) { self.errors diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index a6634f9..373eca2 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -117,6 +117,10 @@ pub struct Parser<'a> { current_token: Option>, last_token_span: Option, pub errors: Vec>, + /// Caches the most recent doc comment for attaching to the next declaration + cached_doc_comment: Option, + /// Maps variable/declaration names to their doc comments + pub declaration_docs: std::collections::HashMap, } impl<'a> Parser<'a> { @@ -126,6 +130,8 @@ impl<'a> Parser<'a> { current_token: None, last_token_span: None, errors: Vec::new(), + cached_doc_comment: None, + declaration_docs: std::collections::HashMap::new(), } } @@ -151,6 +157,26 @@ impl<'a> Parser<'a> { }) } + /// Pops and returns the cached doc comment, if any + pub fn pop_doc_comment(&mut self) -> Option { + self.cached_doc_comment.take() + } + + /// Caches a doc comment for attachment to the next declaration + pub fn cache_doc_comment(&mut self, comment: String) { + self.cached_doc_comment = Some(comment); + } + + /// Stores a doc comment for a declaration (by name) + pub fn store_declaration_doc(&mut self, name: String, doc: String) { + self.declaration_docs.insert(name, doc); + } + + /// Retrieves and removes a doc comment for a declaration + pub fn get_declaration_doc(&mut self, name: &str) -> Option { + self.declaration_docs.get(name).cloned() + } + fn unexpected_eof(&self) -> Error<'a> { Error::UnexpectedEOF(self.last_token_span) } @@ -288,7 +314,36 @@ impl<'a> Parser<'a> { if let Some(token) = &self.current_token { self.last_token_span = Some(Self::token_to_span(token)); } - self.current_token = self.tokenizer.next_token()?; + + // Keep reading tokens, caching doc comments and skipping them + loop { + self.current_token = self.tokenizer.next_token_with_comments()?; + + match &self.current_token { + Some(token) => { + if let TokenType::Comment(comment) = &token.token_type { + // Cache doc comments for attachment to the next declaration + if let tokenizer::token::Comment::Doc(doc_text) = comment { + self.cache_doc_comment(doc_text.to_string()); + } + // Skip all comments (both doc and regular) + continue; + } + + // If we have a cached doc comment and encounter an identifier, associate them + if let TokenType::Identifier(ref id) = token.token_type { + if let Some(doc) = self.cached_doc_comment.take() { + self.store_declaration_doc(id.to_string(), doc); + } + } + + // Non-comment token, use it as current + break; + } + None => break, // EOF + } + } + Ok(()) } @@ -511,7 +566,6 @@ impl<'a> Parser<'a> { TokenType::Keyword(Keyword::Const) => { let spanned_const = self.spanned(|p| p.const_declaration())?; - Some(Spanned { span: spanned_const.span, node: Expression::ConstDeclaration(spanned_const), diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs index 97a794b..50a05a4 100644 --- a/rust_compiler/libs/tokenizer/src/lib.rs +++ b/rust_compiler/libs/tokenizer/src/lib.rs @@ -68,6 +68,12 @@ impl<'a> Tokenizer<'a> { Ok(current.map(|t| t.map(|t| self.get_token(t)))?) } + + /// Returns the next token, including comments. Used to preserve doc comments. + pub fn next_token_with_comments(&mut self) -> Result>, Error> { + let current = self.lexer.next().transpose(); + Ok(current.map(|t| t.map(|t| self.get_token(t)))?) + } } // ... Iterator and TokenizerBuffer implementations remain unchanged ... @@ -127,12 +133,28 @@ impl<'a> TokenizerBuffer<'a> { self.index += 1; Ok(token) } + + pub fn next_token_with_comments(&mut self) -> Result>, Error> { + if let Some(token) = self.buffer.pop_front() { + self.history.push_back(token.clone()); + self.index += 1; + return Ok(Some(token)); + } + let token = self.tokenizer.next_token_with_comments()?; + + if let Some(ref token) = token { + self.history.push_back(token.clone()); + } + + self.index += 1; + Ok(token) + } pub fn peek(&mut self) -> Result>, Error> { if let Some(token) = self.buffer.front() { return Ok(Some(token.clone())); } - let Some(new_token) = self.tokenizer.next_token()? else { + let Some(new_token) = self.tokenizer.next_token_with_comments()? else { return Ok(None); }; self.buffer.push_front(new_token.clone()); diff --git a/rust_compiler/src/ffi/mod.rs b/rust_compiler/src/ffi/mod.rs index 76195a3..996a621 100644 --- a/rust_compiler/src/ffi/mod.rs +++ b/rust_compiler/src/ffi/mod.rs @@ -94,6 +94,30 @@ impl From for FfiDiagnostic { } } +#[derive_ReprC] +#[repr(C)] +pub struct FfiSymbolKindData { + pub kind: u32, // 0=Function, 1=Syscall, 2=Variable + pub arg_count: u32, + pub syscall_type: u32, // 0=System, 1=Math (only for Syscall kind) +} + +#[derive_ReprC] +#[repr(C)] +pub struct FfiSymbolInfo { + pub name: safer_ffi::String, + pub kind_data: FfiSymbolKindData, + pub span: FfiRange, + pub description: safer_ffi::String, +} + +#[derive_ReprC] +#[repr(C)] +pub struct FfiDiagnosticsAndSymbols { + pub diagnostics: safer_ffi::Vec, + pub symbols: safer_ffi::Vec, +} + #[ffi_export] pub fn free_ffi_compilation_result(input: FfiCompilationResult) { drop(input) @@ -109,6 +133,11 @@ pub fn free_ffi_diagnostic_vec(v: safer_ffi::Vec) { drop(v) } +#[ffi_export] +pub fn free_ffi_diagnostics_and_symbols(v: FfiDiagnosticsAndSymbols) { + drop(v) +} + #[ffi_export] pub fn free_string(s: safer_ffi::String) { drop(s) @@ -182,6 +211,10 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec = + SysCall::get_all_documentation().into_iter().collect(); + let mut tokens = Vec::new(); for token in tokenizer { @@ -217,13 +250,26 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec tokens.push(FfiToken { - column: span.start as i32, - error: "".into(), - length: (span.end - span.start) as i32, - tooltip: token_type.docs().into(), - token_kind: token_type.into(), - }), + }) => { + let mut tooltip = token_type.docs(); + + // If no docs from token type, check if it's a syscall + if tooltip.is_empty() { + if let TokenType::Identifier(id) = &token_type { + if let Some(doc) = syscall_docs.get(id.as_ref()) { + tooltip = doc.clone(); + } + } + } + + tokens.push(FfiToken { + column: span.start as i32, + error: "".into(), + length: (span.end - span.start) as i32, + tooltip: tooltip.into(), + token_kind: token_type.into(), + }) + } } } @@ -257,6 +303,88 @@ pub fn diagnose_source(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec< res.unwrap_or(vec![].into()) } +#[ffi_export] +pub fn diagnose_source_with_symbols( + input: safer_ffi::slice::Ref<'_, u16>, +) -> FfiDiagnosticsAndSymbols { + let res = std::panic::catch_unwind(|| { + let input = String::from_utf16_lossy(input.as_slice()); + + let tokenizer = Tokenizer::from(input.as_str()); + let compiler = Compiler::new(Parser::new(tokenizer), None); + + let CompilationResult { + errors: diagnosis, + metadata, + .. + } = compiler.compile(); + + // Convert diagnostics + let mut diagnostics_vec: Vec = Vec::with_capacity(diagnosis.len()); + for err in diagnosis { + diagnostics_vec.push(lsp_types::Diagnostic::from(err).into()); + } + + // Convert symbols + let mut symbols_vec: Vec = Vec::with_capacity(metadata.symbols.len()); + for symbol in &metadata.symbols { + let (kind, arg_count, syscall_type) = match &symbol.kind { + compiler::SymbolKind::Function { parameters, .. } => { + (0, parameters.len() as u32, 0) + } + compiler::SymbolKind::Syscall { + syscall_type, + argument_count, + } => { + let sc_type = match syscall_type { + compiler::SyscallType::System => 0, + compiler::SyscallType::Math => 1, + }; + (1, *argument_count as u32, sc_type) + } + compiler::SymbolKind::Variable { .. } => (2, 0, 0), + }; + + let span = symbol + .span + .as_ref() + .map(|s| (*s).into()) + .unwrap_or(FfiRange { + start_line: 0, + end_line: 0, + start_col: 0, + end_col: 0, + }); + + symbols_vec.push(FfiSymbolInfo { + name: symbol.name.to_string().into(), + kind_data: FfiSymbolKindData { + kind, + arg_count, + syscall_type, + }, + span, + description: symbol + .description + .as_ref() + .map(|d| d.to_string()) + .unwrap_or_default() + .into(), + }); + } + + FfiDiagnosticsAndSymbols { + diagnostics: diagnostics_vec.into(), + symbols: symbols_vec.into(), + } + }); + + res.unwrap_or(FfiDiagnosticsAndSymbols { + diagnostics: vec![].into(), + symbols: vec![].into(), + }) +} + #[ffi_export] pub fn get_docs() -> safer_ffi::Vec { let res = std::panic::catch_unwind(|| {