From 18fbf26dae7d9b5b7b7b2d172776948b8ed00944 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sat, 29 Nov 2025 12:42:07 -0700 Subject: [PATCH] refactor mod to account for changes in the IC10Editor mod interface --- build.sh | 2 + csharp_mod/Extensions.cs | 137 ++++++++----- csharp_mod/FfiGlue.cs | 10 +- csharp_mod/Formatter.cs | 20 +- csharp_mod/Marshal.cs | 182 +++++++++++++---- csharp_mod/Plugin.cs | 51 ++--- rust_compiler/libs/tokenizer/src/lib.rs | 261 +++++++++++------------- rust_compiler/src/lib.rs | 43 ++-- 8 files changed, 409 insertions(+), 297 deletions(-) diff --git a/build.sh b/build.sh index ba83ba6..533eb3f 100755 --- a/build.sh +++ b/build.sh @@ -39,6 +39,7 @@ echo "--------------------" RUST_WIN_EXE="$RUST_DIR/target/x86_64-pc-windows-gnu/release/slang.exe" RUST_LINUX_BIN="$RUST_DIR/target/x86_64-unknown-linux-gnu/release/slang" CHARP_DLL="$CSHARP_DIR/bin/Release/net46/StationeersSlang.dll" +CHARP_PDB="$CSHARP_DIR/bin/Release/net46/StationeersSlang.pdb" # Check if the release dir exists, if not: create it. if [[ ! -d "$RELEASE_DIR" ]]; then @@ -48,3 +49,4 @@ fi cp "$RUST_WIN_EXE" "$RELEASE_DIR/slang.exe" cp "$RUST_LINUX_BIN" "$RELEASE_DIR/slang" cp "$CHARP_DLL" "$RELEASE_DIR/StationeersSlang.dll" +cp "$CHARP_PDB" "$RELEASE_DIR/StationeersSlang.pdb" diff --git a/csharp_mod/Extensions.cs b/csharp_mod/Extensions.cs index e957a88..764b6c9 100644 --- a/csharp_mod/Extensions.cs +++ b/csharp_mod/Extensions.cs @@ -1,66 +1,103 @@ +namespace Slang; + using System; using System.Text; using StationeersIC10Editor; -namespace Slang +public static unsafe class SlangExtensions { - public static unsafe class SlangExtensions + /** + * + * This is a helper method to convert a Rust struct for a string pointer + * into a C# style string. + * + */ + public static string AsString(this Vec_uint8_t vec) { - /** - * - * This is a helper method to convert a Rust struct for a string pointer - * into a C# style string. - * - */ - public static string AsString(this Vec_uint8_t vec) + if (vec.ptr == null || vec.len == UIntPtr.Zero) { - if (vec.ptr == null || vec.len == UIntPtr.Zero) - { - return string.Empty; - } - - // Rust strings are UTF-8. Read bytes from raw pointer. - var toReturn = Encoding.UTF8.GetString(vec.ptr, (int)vec.len); - - return toReturn; + return string.Empty; } - /** - * This will free a Rust string struct. Because this is a pointer to a struct, this memory - * is managed by Rust, therefor it must be freed by Rust - * - */ - public static void Drop(this Vec_uint8_t vec) + // Rust strings are UTF-8. Read bytes from raw pointer. + var toReturn = Encoding.UTF8.GetString(vec.ptr, (int)vec.len); + + return toReturn; + } + + /** + * This will free a Rust string struct. Because this is a pointer to a struct, this memory + * is managed by Rust, therefor it must be freed by Rust + * + */ + public static void Drop(this Vec_uint8_t vec) + { + Ffi.free_string(vec); + } + + /** + * This helper converts a Rust vec to a C# List. This handles freeing the + * Rust allocation after the List is created, there is no need to Drop this memory. + * + */ + public static Line ToLine(this Vec_FfiToken_t vec, string sourceText) + { + var list = new Line(sourceText); + + var currentPtr = vec.ptr; + + // Iterate through the raw memory array + for (int i = 0; i < (int)vec.len; i++) { - Ffi.free_string(vec); + var token = currentPtr[i]; + + var color = GetColorForKind(token.token_kind); + + int colIndex = token.column; + if (colIndex < 0) + colIndex = 0; + + var semanticToken = new SemanticToken( + 0, + colIndex, + token.length, + color, + token.token_kind + ); + + string errMsg = token.error.AsString(); + if (!string.IsNullOrEmpty(errMsg)) + { + semanticToken.IsError = true; + semanticToken.Data = errMsg; + semanticToken.Color = ICodeFormatter.ColorError; + } + list.AddToken(semanticToken); } - /** - * This helper converts a Rust vec to a C# List. This handles freeing the - * Rust allocation after the List is created, there is no need to Drop this memory. - * - */ - public static Line AsList(this Vec_FfiToken_t vec) + Ffi.free_ffi_token_vec(vec); + + return list; + } + + private static uint GetColorForKind(uint kind) + { + switch (kind) { - var list = new Line(); - list.Capacity = (int)vec.len; - - var currentPtr = vec.ptr; - - // Iterate through the raw memory array - for (int i = 0; i < (int)vec.len; i++) - { - // Dereference pointer to get the struct at index i - FfiToken_t token = currentPtr[i]; - - var newToken = new Token(token.text.AsString(), token.column); - - list.Add(newToken); - } - - Ffi.free_ffi_token_vec(vec); - - return list; + case 1: + return SlangFormatter.ColorInstruction; // Keyword + case 2: + return SlangFormatter.ColorDefault; // Identifier + case 3: + return SlangFormatter.ColorNumber; // Number + case 4: + return SlangFormatter.ColorString; // String + case 5: + return SlangFormatter.ColorInstruction; // Boolean + case 6: + return SlangFormatter.ColorDefault; // Symbol + default: + return SlangFormatter.ColorDefault; } } } diff --git a/csharp_mod/FfiGlue.cs b/csharp_mod/FfiGlue.cs index da8fd3b..400805d 100644 --- a/csharp_mod/FfiGlue.cs +++ b/csharp_mod/FfiGlue.cs @@ -83,17 +83,17 @@ public unsafe partial class Ffi { slice_ref_uint16_t input); } -[StructLayout(LayoutKind.Sequential, Size = 104)] +[StructLayout(LayoutKind.Sequential, Size = 64)] public unsafe struct FfiToken_t { - public Vec_uint8_t text; - public Vec_uint8_t tooltip; public Vec_uint8_t error; - public Vec_uint8_t status; - public Int32 column; + + public Int32 length; + + public UInt32 token_kind; } /// diff --git a/csharp_mod/Formatter.cs b/csharp_mod/Formatter.cs index 6bd38fd..29c1822 100644 --- a/csharp_mod/Formatter.cs +++ b/csharp_mod/Formatter.cs @@ -1,12 +1,24 @@ +namespace Slang; + using StationeersIC10Editor; -namespace Slang +public class SlangFormatter : ICodeFormatter { - public class SlangFormatter : ICodeFormatter + public static readonly uint ColorInstruction = ColorFromHTML("#ffff00"); + public static readonly uint ColorString = ColorFromHTML("#ce9178"); + + public override Line ParseLine(string line) { - public override Line ParseLine(string line) + return Marshal.TokenizeLine(line); + } + + public override string Compile() + { + if (Marshal.CompileFromString(this.Lines.RawText, out string compiled)) { - return Marshal.TokenizeLine(line); + return compiled; } + + return string.Empty; } } diff --git a/csharp_mod/Marshal.cs b/csharp_mod/Marshal.cs index bc52a99..dc80d9d 100644 --- a/csharp_mod/Marshal.cs +++ b/csharp_mod/Marshal.cs @@ -1,59 +1,157 @@ +namespace Slang; + using System; +using System.IO; +using System.Reflection; +using System.Runtime.InteropServices; using StationeersIC10Editor; -namespace Slang +public static class Marshal { - public static class Marshal - { - public static unsafe Line TokenizeLine(string source) - { - if (String.IsNullOrEmpty(source)) - { - return new Line(); - } + private static IntPtr _libraryHandle = IntPtr.Zero; - fixed (char* ptrString = source) - { - var input = new slice_ref_uint16_t - { - ptr = (ushort*)ptrString, - len = (UIntPtr)source.Length, - }; - return Ffi.tokenize_line(input).AsList(); - } + [DllImport("kernel32", SetLastError = true, CharSet = CharSet.Ansi)] + private static extern IntPtr LoadLibrary([MarshalAs(UnmanagedType.LPStr)] string lpFileName); + + [DllImport("kernel32", SetLastError = true)] + private static extern bool FreeLibrary(IntPtr hModule); + + private static bool EnsureLibLoaded() + { + if (_libraryHandle != IntPtr.Zero) + { + return true; } - public static unsafe bool CompileFromString(string inputString, out string compiledString) + try { - if (String.IsNullOrEmpty(inputString)) + _libraryHandle = LoadLibrary(ExtractNativeLibrary(Ffi.RustLib)); + CodeFormatters.RegisterFormatter("Slang", () => new SlangFormatter(), true); + return true; + } + catch (Exception ex) + { + L.Error($"Failed to init slang compiler: {ex.Message}"); + return false; + } + } + + public static bool Init() + { + return EnsureLibLoaded(); + } + + public static bool Destroy() + { + if (_libraryHandle == IntPtr.Zero) + { + return true; + } + + try + { + FreeLibrary(_libraryHandle); + _libraryHandle = IntPtr.Zero; + return true; + } + catch (Exception ex) + { + L.Warning($"Unable to free handle to slang compiler's dll. {ex.Message}"); + return false; + } + } + + public static unsafe Line TokenizeLine(string source) + { + if (String.IsNullOrEmpty(source)) + { + return new Line(source); + } + + if (!EnsureLibLoaded()) + { + return new Line(source); + } + + fixed (char* ptrString = source) + { + var input = new slice_ref_uint16_t { - compiledString = String.Empty; - return false; + ptr = (ushort*)ptrString, + len = (UIntPtr)source.Length, + }; + return Ffi.tokenize_line(input).ToLine(source); + } + } + + public static unsafe bool CompileFromString(string inputString, out string compiledString) + { + if (String.IsNullOrEmpty(inputString)) + { + compiledString = String.Empty; + return false; + } + + if (!EnsureLibLoaded()) + { + compiledString = String.Empty; + return false; + } + + fixed (char* ptrString = inputString) + { + var input = new slice_ref_uint16_t + { + ptr = (ushort*)ptrString, + len = (UIntPtr)inputString.Length, + }; + + var result = Ffi.compile_from_string(input); + try + { + if ((ulong)result.len < 1) + { + compiledString = String.Empty; + return false; + } + compiledString = result.AsString(); + return true; + } + finally + { + result.Drop(); + } + } + } + + private static string ExtractNativeLibrary(string libName) + { + string destinationPath = Path.Combine(Path.GetTempPath(), libName); + + Assembly assembly = Assembly.GetExecutingAssembly(); + + using (Stream stream = assembly.GetManifestResourceStream(libName)) + { + if (stream == null) + { + L.Error( + $"{libName} not found. This means it was not embedded in the mod. Please contact the mod author!" + ); + return ""; } - fixed (char* ptrString = inputString) + try { - var input = new slice_ref_uint16_t + using (FileStream fileStream = new FileStream(destinationPath, FileMode.Create)) { - ptr = (ushort*)ptrString, - len = (UIntPtr)inputString.Length, - }; - - var result = Ffi.compile_from_string(input); - try - { - if ((ulong)result.len < 1) - { - compiledString = String.Empty; - return false; - } - compiledString = result.AsString(); - return true; - } - finally - { - result.Drop(); + stream.CopyTo(fileStream); } + return destinationPath; + } + catch (IOException e) + { + L.Warning($"Could not overwrite {libName} (it might be in use): {e.Message}"); + return ""; } } } diff --git a/csharp_mod/Plugin.cs b/csharp_mod/Plugin.cs index b559d1f..09bf66e 100644 --- a/csharp_mod/Plugin.cs +++ b/csharp_mod/Plugin.cs @@ -1,9 +1,6 @@ -using System.IO; -using System.Reflection; using System.Text.RegularExpressions; using BepInEx; using HarmonyLib; -using StationeersIC10Editor; namespace Slang { @@ -44,6 +41,8 @@ namespace Slang public const string PluginGuid = "com.biddydev.slang"; public const string PluginName = "Slang"; + private Harmony? _harmony; + private static Regex? _slangSourceCheck = null; private static Regex SlangSourceCheck @@ -89,44 +88,28 @@ namespace Slang private void Awake() { L.SetLogger(Logger); + this._harmony = new Harmony(PluginGuid); + L.Info("slang loaded"); - if (ExtractNativeDll(Ffi.RustLib)) + // If we failed to load the compiler, bail from the rest of the patches. It won't matter, + // as the compiler itself has failed to load. + if (!Marshal.Init()) { - var harmony = new Harmony(PluginGuid); - harmony.PatchAll(); - CodeFormatters.RegisterFormatter("slang", () => new SlangFormatter(), true); + return; } + + this._harmony.PatchAll(); } - private bool ExtractNativeDll(string fileName) + private void OnDestroy() { - string destinationPath = Path.Combine(Path.GetDirectoryName(Info.Location), fileName); - - Assembly assembly = Assembly.GetExecutingAssembly(); - - using (Stream stream = assembly.GetManifestResourceStream(fileName)) + if (Marshal.Destroy()) { - if (stream == null) - { - L.Error( - $"{Ffi.RustLib} not found. This means it was not embedded in the mod. Please contact the mod author!" - ); - return false; - } - - try - { - using (FileStream fileStream = new FileStream(destinationPath, FileMode.Create)) - { - stream.CopyTo(fileStream); - } - return true; - } - catch (IOException e) - { - L.Warning($"Could not overwrite {fileName} (it might be in use): {e.Message}"); - return false; - } + L.Info("FFI references cleaned up."); + } + if (this._harmony is not null) + { + this._harmony.UnpatchSelf(); } } } diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs index 8909d5b..8f2dc41 100644 --- a/rust_compiler/libs/tokenizer/src/lib.rs +++ b/rust_compiler/libs/tokenizer/src/lib.rs @@ -56,7 +56,7 @@ impl<'a> Tokenizer<'a> { Ok(Self { reader, line: 1, - column: 1, + column: 0, // Start at 0 so first char becomes 1 char_buffer: [0], returned_eof: false, string_buffer: String::new(), @@ -71,7 +71,7 @@ impl<'a> From for Tokenizer<'a> { Self { reader, line: 1, - column: 1, + column: 0, char_buffer: [0], returned_eof: false, string_buffer: String::new(), @@ -84,7 +84,7 @@ impl<'a> From<&'a str> for Tokenizer<'a> { Self { reader: BufReader::new(Box::new(Cursor::new(value)) as Box), char_buffer: [0], - column: 1, + column: 0, line: 1, returned_eof: false, string_buffer: String::new(), @@ -93,12 +93,6 @@ impl<'a> From<&'a str> for Tokenizer<'a> { } impl<'a> Tokenizer<'a> { - /// Consumes the tokenizer and returns the next token in the stream - /// If there are no more tokens in the stream, this function returns None - /// If there is an error reading the stream, this function returns an error - /// - /// # Important - /// This function will increment the line and column counters fn next_char(&mut self) -> Result, Error> { let bytes_read = self.reader.read(&mut self.char_buffer)?; @@ -106,7 +100,6 @@ impl<'a> Tokenizer<'a> { return Ok(None); } - // Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1 let c = self.char_buffer[0] as char; if c == '\n' { self.line += 1; @@ -119,30 +112,17 @@ impl<'a> Tokenizer<'a> { Ok(Some(c)) } - /// Peeks the next character in the stream without consuming it - /// - /// # Important - /// This does not increment the line or column counters fn peek_next_char(&mut self) -> Result, Error> { let current_pos = self.reader.stream_position()?; - let to_return = if self.reader.read(&mut self.char_buffer)? == 0 { None } else { self.reader.seek(SeekFrom::Start(current_pos))?; - - // Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1 Some(self.char_buffer[0] as char) }; - Ok(to_return) } - /// Skips the current line in the stream. - /// Useful for skipping comments or empty lines - /// - /// # Important - /// This function will increment the line and column counters fn skip_line(&mut self) -> Result<(), Error> { while let Some(next_char) = self.next_char()? { if next_char == '\n' { @@ -152,40 +132,50 @@ impl<'a> Tokenizer<'a> { Ok(()) } - /// Consumes the tokenizer and returns the next token in the stream - /// If there are no more tokens in the stream, this function returns None pub fn next_token(&mut self) -> Result, Error> { + self.string_buffer.clear(); + while let Some(next_char) = self.next_char()? { - // skip whitespace if next_char.is_whitespace() { + self.string_buffer.clear(); continue; } - // skip comments if next_char == '/' && self.peek_next_char()? == Some('/') { self.skip_line()?; + self.string_buffer.clear(); continue; } + // Capture start position before delegating + let start_line = self.line; + let start_col = self.column; + match next_char { - // numbers '0'..='9' => { - return self.tokenize_number(next_char).map(Some); + return self + .tokenize_number(next_char, start_line, start_col) + .map(Some); + } + '"' | '\'' => { + return self + .tokenize_string(next_char, start_line, start_col) + .map(Some); } - // strings - '"' | '\'' => return self.tokenize_string(next_char).map(Some), - // symbols excluding `"` and `'` char if !char.is_alphanumeric() && char != '"' && char != '\'' => { - return self.tokenize_symbol(next_char).map(Some); + return self + .tokenize_symbol(next_char, start_line, start_col) + .map(Some); } - // keywords and identifiers char if char.is_alphabetic() => { - return self.tokenize_keyword_or_identifier(next_char).map(Some); + return self + .tokenize_keyword_or_identifier(next_char, start_line, start_col) + .map(Some); } _ => { return Err(Error::UnknownSymbolError( next_char, - self.line, - self.column, + start_line, + start_col, std::mem::take(&mut self.string_buffer), )); } @@ -204,13 +194,10 @@ impl<'a> Tokenizer<'a> { } } - /// Peeks the next token in the stream without consuming it - /// If there are no more tokens in the stream, this function returns None pub fn peek_next(&mut self) -> Result, Error> { let current_pos = self.reader.stream_position()?; let column = self.column; let line = self.line; - let token = self.next_token()?; self.reader.seek(SeekFrom::Start(current_pos))?; self.column = column; @@ -218,22 +205,26 @@ impl<'a> Tokenizer<'a> { Ok(token) } - /// Tokenizes a symbol - fn tokenize_symbol(&mut self, first_symbol: char) -> Result { - /// Helper macro to create a symbol token + // Updated helper functions to accept start_line and start_col + + fn tokenize_symbol( + &mut self, + first_symbol: char, + line: usize, + col: usize, + ) -> Result { macro_rules! symbol { ($symbol:ident) => { Ok(Token::new( TokenType::Symbol(Symbol::$symbol), - self.line, - self.column, + line, + col, Some(std::mem::take(&mut self.string_buffer)), )) }; } match first_symbol { - // single character symbols '(' => symbol!(LParen), ')' => symbol!(RParen), '{' => symbol!(LBrace), @@ -246,42 +237,34 @@ impl<'a> Tokenizer<'a> { '+' => symbol!(Plus), '-' => symbol!(Minus), '/' => symbol!(Slash), - '.' => symbol!(Dot), '^' => symbol!(Caret), '%' => symbol!(Percent), - - // multi-character symbols '<' if self.peek_next_char()? == Some('=') => { self.next_char()?; symbol!(LessThanOrEqual) } '<' => symbol!(LessThan), - '>' if self.peek_next_char()? == Some('=') => { self.next_char()?; symbol!(GreaterThanOrEqual) } '>' => symbol!(GreaterThan), - '=' if self.peek_next_char()? == Some('=') => { self.next_char()?; symbol!(Equal) } '=' => symbol!(Assign), - '!' if self.peek_next_char()? == Some('=') => { self.next_char()?; symbol!(NotEqual) } '!' => symbol!(LogicalNot), - '*' if self.peek_next_char()? == Some('*') => { self.next_char()?; symbol!(Exp) } '*' => symbol!(Asterisk), - '&' if self.peek_next_char()? == Some('&') => { self.next_char()?; symbol!(LogicalAnd) @@ -290,45 +273,39 @@ impl<'a> Tokenizer<'a> { self.next_char()?; symbol!(LogicalOr) } - _ => Err(Error::UnknownSymbolError( first_symbol, - self.line, - self.column, + line, + col, std::mem::take(&mut self.string_buffer), )), } } - /// Tokenizes a number literal. Also handles temperatures with a suffix of `c`, `f`, or `k`. - fn tokenize_number(&mut self, first_char: char) -> Result { + fn tokenize_number( + &mut self, + first_char: char, + line: usize, + col: usize, + ) -> Result { let mut primary = String::with_capacity(16); let mut decimal: Option = None; let mut reading_decimal = false; - - let column = self.column; - let line = self.line; - primary.push(first_char); while let Some(next_char) = self.peek_next_char()? { if next_char.is_whitespace() { break; } - if next_char == '.' { reading_decimal = true; self.next_char()?; continue; } - - // support underscores in numbers for readability if next_char == '_' { self.next_char()?; continue; } - - // This is for the times when we have a number followed by a symbol (like a semicolon or =) if !next_char.is_numeric() { break; } @@ -343,33 +320,21 @@ impl<'a> Tokenizer<'a> { let number: Number = if let Some(decimal) = decimal { let decimal_scale = decimal.len() as u32; - let number = format!("{}{}", primary, decimal) - .parse::() - .map_err(|e| { - Error::NumberParseError( - e, - self.line, - self.column, - std::mem::take(&mut self.string_buffer), - ) - })?; + let number_str = format!("{}{}", primary, decimal); + let number = number_str.parse::().map_err(|e| { + Error::NumberParseError(e, line, col, std::mem::take(&mut self.string_buffer)) + })?; Number::Decimal( Decimal::try_from_i128_with_scale(number, decimal_scale).map_err(|e| { - Error::DecimalParseError( - e, - line, - column, - std::mem::take(&mut self.string_buffer), - ) + Error::DecimalParseError(e, line, col, std::mem::take(&mut self.string_buffer)) })?, ) } else { Number::Integer(primary.parse().map_err(|e| { - Error::NumberParseError(e, line, column, std::mem::take(&mut self.string_buffer)) + Error::NumberParseError(e, line, col, std::mem::take(&mut self.string_buffer)) })?) }; - // check if the next char is a temperature suffix if let Some(next_char) = self.peek_next_char()? { let temperature = match next_char { 'c' => Temperature::Celsius(number), @@ -379,7 +344,7 @@ impl<'a> Tokenizer<'a> { return Ok(Token::new( TokenType::Number(number), line, - column, + col, Some(std::mem::take(&mut self.string_buffer)), )); } @@ -390,74 +355,65 @@ impl<'a> Tokenizer<'a> { Ok(Token::new( TokenType::Number(temperature), line, - column, + col, Some(std::mem::take(&mut self.string_buffer)), )) } else { Ok(Token::new( TokenType::Number(number), line, - column, + col, Some(std::mem::take(&mut self.string_buffer)), )) } } - /// Tokenizes a string literal - fn tokenize_string(&mut self, beginning_quote: char) -> Result { + fn tokenize_string( + &mut self, + beginning_quote: char, + line: usize, + col: usize, + ) -> Result { let mut buffer = String::with_capacity(16); - - let column = self.column; - let line = self.line; - while let Some(next_char) = self.next_char()? { if next_char == beginning_quote { break; } - buffer.push(next_char); } - Ok(Token::new( TokenType::String(buffer), line, - column, + col, Some(std::mem::take(&mut self.string_buffer)), )) } - /// Tokenizes a keyword or an identifier. Also handles boolean literals - fn tokenize_keyword_or_identifier(&mut self, first_char: char) -> Result { + fn tokenize_keyword_or_identifier( + &mut self, + first_char: char, + line: usize, + col: usize, + ) -> Result { macro_rules! keyword { ($keyword:ident) => {{ return Ok(Token::new( TokenType::Keyword(Keyword::$keyword), - self.line, - self.column, + line, + col, Some(std::mem::take(&mut self.string_buffer)), )); }}; } - - /// Helper macro to check if the next character is whitespace or not alphanumeric macro_rules! next_ws { - () => { - matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || self.peek_next_char()?.is_none() - }; + () => { matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || self.peek_next_char()?.is_none() }; } let mut buffer = String::with_capacity(16); - let line = self.line; - let column = self.column; - let mut looped_char = Some(first_char); while let Some(next_char) = looped_char { - if next_char.is_whitespace() { - break; - } - - if !next_char.is_alphanumeric() { + if next_char.is_whitespace() || !next_char.is_alphanumeric() { break; } buffer.push(next_char); @@ -474,51 +430,47 @@ impl<'a> Tokenizer<'a> { "break" if next_ws!() => keyword!(Break), "while" if next_ws!() => keyword!(While), "continue" if next_ws!() => keyword!(Continue), - - // boolean literals "true" if next_ws!() => { return Ok(Token::new( TokenType::Boolean(true), - self.line, - self.column, + line, + col, Some(std::mem::take(&mut self.string_buffer)), )); } "false" if next_ws!() => { return Ok(Token::new( TokenType::Boolean(false), - self.line, - self.column, + line, + col, Some(std::mem::take(&mut self.string_buffer)), )); } - // if the next character is whitespace or not alphanumeric, then we have an identifier - // this is because keywords are checked first val if next_ws!() => { return Ok(Token::new( TokenType::Identifier(val.to_string()), line, - column, + col, Some(std::mem::take(&mut self.string_buffer)), )); } _ => {} } - looped_char = self.next_char()?; } Err(Error::UnknownKeywordOrIdentifierError( buffer, line, - column, + col, std::mem::take(&mut self.string_buffer), )) } } +// ... Iterator and TokenizerBuffer implementations remain unchanged ... +// They just call the methods above which now use the passed-in start coordinates. impl<'a> Iterator for Tokenizer<'a> { type Item = Result; - fn next(&mut self) -> Option { match self.next_token() { Ok(Some(tok)) => Some(Ok(tok)), @@ -542,38 +494,26 @@ impl<'a> TokenizerBuffer<'a> { history: VecDeque::with_capacity(128), } } - - /// Reads the next token from the tokenizer, pushing the value to the back of the history - /// and returning the token pub fn next_token(&mut self) -> Result, Error> { if let Some(token) = self.buffer.pop_front() { self.history.push_back(token.clone()); return Ok(Some(token)); } - let token = self.tokenizer.next_token()?; if let Some(ref token) = token { self.history.push_back(token.clone()); } Ok(token) } - - /// Peeks the next token in the stream without adding to the history stack pub fn peek(&mut self) -> Result, Error> { if let Some(token) = self.buffer.front() { return Ok(Some(token.clone())); } - let token = self.tokenizer.peek_next()?; Ok(token) } - fn seek_from_current(&mut self, seek_to: i64) -> Result<(), Error> { use Ordering::*; - // if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer - // if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history) - // if seek_to == 0 then we don't need to do anything - match seek_to.cmp(&0) { Greater => { let mut tokens = Vec::with_capacity(seek_to as usize); @@ -606,18 +546,13 @@ impl<'a> TokenizerBuffer<'a> { } _ => {} } - Ok(()) } - - /// Adds to or removes from the History stack, allowing the user to move back and forth in the stream pub fn seek(&mut self, from: SeekFrom) -> Result<(), Error> { match from { SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?, - SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"), - SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"), + _ => unimplemented!("SeekFrom::End/Start not implemented"), } - Ok(()) } } @@ -682,7 +617,7 @@ mod tests { assert_eq!(char, Some('f')); assert_eq!(tokenizer.line, 1); - assert_eq!(tokenizer.column, 2); + assert_eq!(tokenizer.column, 1); Ok(()) } @@ -695,7 +630,7 @@ mod tests { assert_eq!(char, Some('\n')); assert_eq!(tokenizer.line, 1); - assert_eq!(tokenizer.column, 1); + assert_eq!(tokenizer.column, 0); let char = tokenizer.next_char()?; assert_eq!(char, Some('\n')); @@ -1010,4 +945,36 @@ mod tests { Ok(()) } + + #[test] + fn test_identifier_has_correct_length() -> Result<()> { + let mut tokenizer = Tokenizer::from("hello"); + assert_eq!( + tokenizer.next_token()?, + Some(Token { + token_type: TokenType::Identifier("hello".into()), + original_string: Some("hello".into()), + column: 1, + line: 1 + }) + ); + Ok(()) + } + + #[test] + fn test_keyword_token_has_correct_length() -> Result<()> { + let mut tokenizer = Tokenizer::from("while"); + + assert_eq!( + tokenizer.next_token()?, + Some(Token { + token_type: TokenType::Keyword(Keyword::While), + original_string: Some("while".into()), + column: 1, + line: 1 + }) + ); + + Ok(()) + } } diff --git a/rust_compiler/src/lib.rs b/rust_compiler/src/lib.rs index 056f3fc..c171cdd 100644 --- a/rust_compiler/src/lib.rs +++ b/rust_compiler/src/lib.rs @@ -7,11 +7,24 @@ use tokenizer::{token::TokenType, Error as TokenizerError, Tokenizer}; #[derive_ReprC] #[repr(C)] pub struct FfiToken { - pub text: safer_ffi::String, pub tooltip: safer_ffi::String, pub error: safer_ffi::String, - pub status: safer_ffi::String, pub column: i32, + pub length: i32, + pub token_kind: u32, +} + +fn map_token_kind(t: &TokenType) -> u32 { + use TokenType::*; + match t { + Keyword(_) => 1, + Identifier(_) => 2, + Number(_) => 3, + String(_) => 4, + Boolean(_) => 5, + Symbol(_) => 6, + _ => 0, + } } /// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because @@ -49,29 +62,29 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec { + Err(TokenizerError::NumberParseError(_, _, col, ref str)) + | Err(TokenizerError::UnknownSymbolError(_, _, col, ref str)) + | Err(TokenizerError::DecimalParseError(_, _, col, ref str)) + | Err(TokenizerError::UnknownKeywordOrIdentifierError(_, _, col, ref str)) => { tokens.push(FfiToken { - column: col as i32, - text: original.to_string().into(), + column: col as i32 - 1, tooltip: "".into(), + length: str.len() as i32, + token_kind: 0, // Safety: it's okay to unwrap the err here because we are matching on the `Err` variant error: token.unwrap_err().to_string().into(), - status: "".into(), }); } Err(_) => return safer_ffi::Vec::EMPTY, Ok(token) if !matches!(token.token_type, TokenType::EOF) => tokens.push(FfiToken { - text: token - .original_string - .unwrap_or(token.token_type.to_string()) - .into(), tooltip: "".into(), error: "".into(), - status: "".into(), - column: token.column as i32, + length: token + .original_string + .map(|s| s.len() as i32) + .unwrap_or_default(), + token_kind: map_token_kind(&token.token_type), + column: token.column as i32 - 1, }), _ => {} }