Merge pull request #7 from dbidwell94/tokenization-refactor
Tokenization refactor
This commit is contained in:
2
build.sh
2
build.sh
@@ -39,6 +39,7 @@ echo "--------------------"
|
|||||||
RUST_WIN_EXE="$RUST_DIR/target/x86_64-pc-windows-gnu/release/slang.exe"
|
RUST_WIN_EXE="$RUST_DIR/target/x86_64-pc-windows-gnu/release/slang.exe"
|
||||||
RUST_LINUX_BIN="$RUST_DIR/target/x86_64-unknown-linux-gnu/release/slang"
|
RUST_LINUX_BIN="$RUST_DIR/target/x86_64-unknown-linux-gnu/release/slang"
|
||||||
CHARP_DLL="$CSHARP_DIR/bin/Release/net46/StationeersSlang.dll"
|
CHARP_DLL="$CSHARP_DIR/bin/Release/net46/StationeersSlang.dll"
|
||||||
|
CHARP_PDB="$CSHARP_DIR/bin/Release/net46/StationeersSlang.pdb"
|
||||||
|
|
||||||
# Check if the release dir exists, if not: create it.
|
# Check if the release dir exists, if not: create it.
|
||||||
if [[ ! -d "$RELEASE_DIR" ]]; then
|
if [[ ! -d "$RELEASE_DIR" ]]; then
|
||||||
@@ -48,3 +49,4 @@ fi
|
|||||||
cp "$RUST_WIN_EXE" "$RELEASE_DIR/slang.exe"
|
cp "$RUST_WIN_EXE" "$RELEASE_DIR/slang.exe"
|
||||||
cp "$RUST_LINUX_BIN" "$RELEASE_DIR/slang"
|
cp "$RUST_LINUX_BIN" "$RELEASE_DIR/slang"
|
||||||
cp "$CHARP_DLL" "$RELEASE_DIR/StationeersSlang.dll"
|
cp "$CHARP_DLL" "$RELEASE_DIR/StationeersSlang.dll"
|
||||||
|
cp "$CHARP_PDB" "$RELEASE_DIR/StationeersSlang.pdb"
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
|
namespace Slang;
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using StationeersIC10Editor;
|
using StationeersIC10Editor;
|
||||||
|
|
||||||
namespace Slang
|
|
||||||
{
|
|
||||||
public static unsafe class SlangExtensions
|
public static unsafe class SlangExtensions
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
@@ -40,27 +40,64 @@ namespace Slang
|
|||||||
* Rust allocation after the List is created, there is no need to Drop this memory.
|
* Rust allocation after the List is created, there is no need to Drop this memory.
|
||||||
* </summary>
|
* </summary>
|
||||||
*/
|
*/
|
||||||
public static Line AsList(this Vec_FfiToken_t vec)
|
public static Line ToLine(this Vec_FfiToken_t vec, string sourceText)
|
||||||
{
|
{
|
||||||
var list = new Line();
|
var list = new Line(sourceText);
|
||||||
list.Capacity = (int)vec.len;
|
|
||||||
|
|
||||||
var currentPtr = vec.ptr;
|
var currentPtr = vec.ptr;
|
||||||
|
|
||||||
// Iterate through the raw memory array
|
// Iterate through the raw memory array
|
||||||
for (int i = 0; i < (int)vec.len; i++)
|
for (int i = 0; i < (int)vec.len; i++)
|
||||||
{
|
{
|
||||||
// Dereference pointer to get the struct at index i
|
var token = currentPtr[i];
|
||||||
FfiToken_t token = currentPtr[i];
|
|
||||||
|
|
||||||
var newToken = new Token(token.text.AsString(), token.column);
|
var color = GetColorForKind(token.token_kind);
|
||||||
|
|
||||||
list.Add(newToken);
|
int colIndex = token.column;
|
||||||
|
if (colIndex < 0)
|
||||||
|
colIndex = 0;
|
||||||
|
|
||||||
|
var semanticToken = new SemanticToken(
|
||||||
|
0,
|
||||||
|
colIndex,
|
||||||
|
token.length,
|
||||||
|
color,
|
||||||
|
token.token_kind
|
||||||
|
);
|
||||||
|
|
||||||
|
string errMsg = token.error.AsString();
|
||||||
|
if (!string.IsNullOrEmpty(errMsg))
|
||||||
|
{
|
||||||
|
semanticToken.IsError = true;
|
||||||
|
semanticToken.Data = errMsg;
|
||||||
|
semanticToken.Color = ICodeFormatter.ColorError;
|
||||||
|
}
|
||||||
|
list.AddToken(semanticToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ffi.free_ffi_token_vec(vec);
|
Ffi.free_ffi_token_vec(vec);
|
||||||
|
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static uint GetColorForKind(uint kind)
|
||||||
|
{
|
||||||
|
switch (kind)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
return SlangFormatter.ColorInstruction; // Keyword
|
||||||
|
case 2:
|
||||||
|
return SlangFormatter.ColorDefault; // Identifier
|
||||||
|
case 3:
|
||||||
|
return SlangFormatter.ColorNumber; // Number
|
||||||
|
case 4:
|
||||||
|
return SlangFormatter.ColorString; // String
|
||||||
|
case 5:
|
||||||
|
return SlangFormatter.ColorInstruction; // Boolean
|
||||||
|
case 6:
|
||||||
|
return SlangFormatter.ColorDefault; // Symbol
|
||||||
|
default:
|
||||||
|
return SlangFormatter.ColorDefault;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -83,17 +83,17 @@ public unsafe partial class Ffi {
|
|||||||
slice_ref_uint16_t input);
|
slice_ref_uint16_t input);
|
||||||
}
|
}
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential, Size = 104)]
|
[StructLayout(LayoutKind.Sequential, Size = 64)]
|
||||||
public unsafe struct FfiToken_t {
|
public unsafe struct FfiToken_t {
|
||||||
public Vec_uint8_t text;
|
|
||||||
|
|
||||||
public Vec_uint8_t tooltip;
|
public Vec_uint8_t tooltip;
|
||||||
|
|
||||||
public Vec_uint8_t error;
|
public Vec_uint8_t error;
|
||||||
|
|
||||||
public Vec_uint8_t status;
|
|
||||||
|
|
||||||
public Int32 column;
|
public Int32 column;
|
||||||
|
|
||||||
|
public Int32 length;
|
||||||
|
|
||||||
|
public UInt32 token_kind;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|||||||
@@ -1,12 +1,24 @@
|
|||||||
|
namespace Slang;
|
||||||
|
|
||||||
using StationeersIC10Editor;
|
using StationeersIC10Editor;
|
||||||
|
|
||||||
namespace Slang
|
|
||||||
{
|
|
||||||
public class SlangFormatter : ICodeFormatter
|
public class SlangFormatter : ICodeFormatter
|
||||||
{
|
{
|
||||||
|
public static readonly uint ColorInstruction = ColorFromHTML("#ffff00");
|
||||||
|
public static readonly uint ColorString = ColorFromHTML("#ce9178");
|
||||||
|
|
||||||
public override Line ParseLine(string line)
|
public override Line ParseLine(string line)
|
||||||
{
|
{
|
||||||
return Marshal.TokenizeLine(line);
|
return Marshal.TokenizeLine(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public override string Compile()
|
||||||
|
{
|
||||||
|
if (Marshal.CompileFromString(this.Lines.RawText, out string compiled))
|
||||||
|
{
|
||||||
|
return compiled;
|
||||||
|
}
|
||||||
|
|
||||||
|
return string.Empty;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,15 +1,76 @@
|
|||||||
|
namespace Slang;
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using System.Reflection;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
using StationeersIC10Editor;
|
using StationeersIC10Editor;
|
||||||
|
|
||||||
namespace Slang
|
|
||||||
{
|
|
||||||
public static class Marshal
|
public static class Marshal
|
||||||
{
|
{
|
||||||
|
private static IntPtr _libraryHandle = IntPtr.Zero;
|
||||||
|
|
||||||
|
[DllImport("kernel32", SetLastError = true, CharSet = CharSet.Ansi)]
|
||||||
|
private static extern IntPtr LoadLibrary([MarshalAs(UnmanagedType.LPStr)] string lpFileName);
|
||||||
|
|
||||||
|
[DllImport("kernel32", SetLastError = true)]
|
||||||
|
private static extern bool FreeLibrary(IntPtr hModule);
|
||||||
|
|
||||||
|
private static bool EnsureLibLoaded()
|
||||||
|
{
|
||||||
|
if (_libraryHandle != IntPtr.Zero)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_libraryHandle = LoadLibrary(ExtractNativeLibrary(Ffi.RustLib));
|
||||||
|
CodeFormatters.RegisterFormatter("Slang", typeof(SlangFormatter), true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
L.Error($"Failed to init slang compiler: {ex.Message}");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static bool Init()
|
||||||
|
{
|
||||||
|
return EnsureLibLoaded();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static bool Destroy()
|
||||||
|
{
|
||||||
|
if (_libraryHandle == IntPtr.Zero)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
FreeLibrary(_libraryHandle);
|
||||||
|
_libraryHandle = IntPtr.Zero;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
L.Warning($"Unable to free handle to slang compiler's dll. {ex.Message}");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static unsafe Line TokenizeLine(string source)
|
public static unsafe Line TokenizeLine(string source)
|
||||||
{
|
{
|
||||||
if (String.IsNullOrEmpty(source))
|
if (String.IsNullOrEmpty(source))
|
||||||
{
|
{
|
||||||
return new Line();
|
return new Line(source);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!EnsureLibLoaded())
|
||||||
|
{
|
||||||
|
return new Line(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
fixed (char* ptrString = source)
|
fixed (char* ptrString = source)
|
||||||
@@ -19,7 +80,7 @@ namespace Slang
|
|||||||
ptr = (ushort*)ptrString,
|
ptr = (ushort*)ptrString,
|
||||||
len = (UIntPtr)source.Length,
|
len = (UIntPtr)source.Length,
|
||||||
};
|
};
|
||||||
return Ffi.tokenize_line(input).AsList();
|
return Ffi.tokenize_line(input).ToLine(source);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -31,6 +92,12 @@ namespace Slang
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!EnsureLibLoaded())
|
||||||
|
{
|
||||||
|
compiledString = String.Empty;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
fixed (char* ptrString = inputString)
|
fixed (char* ptrString = inputString)
|
||||||
{
|
{
|
||||||
var input = new slice_ref_uint16_t
|
var input = new slice_ref_uint16_t
|
||||||
@@ -56,5 +123,36 @@ namespace Slang
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static string ExtractNativeLibrary(string libName)
|
||||||
|
{
|
||||||
|
string destinationPath = Path.Combine(Path.GetTempPath(), libName);
|
||||||
|
|
||||||
|
Assembly assembly = Assembly.GetExecutingAssembly();
|
||||||
|
|
||||||
|
using (Stream stream = assembly.GetManifestResourceStream(libName))
|
||||||
|
{
|
||||||
|
if (stream == null)
|
||||||
|
{
|
||||||
|
L.Error(
|
||||||
|
$"{libName} not found. This means it was not embedded in the mod. Please contact the mod author!"
|
||||||
|
);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
using (FileStream fileStream = new FileStream(destinationPath, FileMode.Create))
|
||||||
|
{
|
||||||
|
stream.CopyTo(fileStream);
|
||||||
|
}
|
||||||
|
return destinationPath;
|
||||||
|
}
|
||||||
|
catch (IOException e)
|
||||||
|
{
|
||||||
|
L.Warning($"Could not overwrite {libName} (it might be in use): {e.Message}");
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
using System.IO;
|
|
||||||
using System.Reflection;
|
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using BepInEx;
|
using BepInEx;
|
||||||
using HarmonyLib;
|
using HarmonyLib;
|
||||||
using StationeersIC10Editor;
|
|
||||||
|
|
||||||
namespace Slang
|
namespace Slang
|
||||||
{
|
{
|
||||||
@@ -44,6 +41,8 @@ namespace Slang
|
|||||||
public const string PluginGuid = "com.biddydev.slang";
|
public const string PluginGuid = "com.biddydev.slang";
|
||||||
public const string PluginName = "Slang";
|
public const string PluginName = "Slang";
|
||||||
|
|
||||||
|
private Harmony? _harmony;
|
||||||
|
|
||||||
private static Regex? _slangSourceCheck = null;
|
private static Regex? _slangSourceCheck = null;
|
||||||
|
|
||||||
private static Regex SlangSourceCheck
|
private static Regex SlangSourceCheck
|
||||||
@@ -89,44 +88,28 @@ namespace Slang
|
|||||||
private void Awake()
|
private void Awake()
|
||||||
{
|
{
|
||||||
L.SetLogger(Logger);
|
L.SetLogger(Logger);
|
||||||
|
this._harmony = new Harmony(PluginGuid);
|
||||||
|
L.Info("slang loaded");
|
||||||
|
|
||||||
if (ExtractNativeDll(Ffi.RustLib))
|
// If we failed to load the compiler, bail from the rest of the patches. It won't matter,
|
||||||
|
// as the compiler itself has failed to load.
|
||||||
|
if (!Marshal.Init())
|
||||||
{
|
{
|
||||||
var harmony = new Harmony(PluginGuid);
|
return;
|
||||||
harmony.PatchAll();
|
|
||||||
CodeFormatters.RegisterFormatter("slang", () => new SlangFormatter(), true);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private bool ExtractNativeDll(string fileName)
|
this._harmony.PatchAll();
|
||||||
{
|
|
||||||
string destinationPath = Path.Combine(Path.GetDirectoryName(Info.Location), fileName);
|
|
||||||
|
|
||||||
Assembly assembly = Assembly.GetExecutingAssembly();
|
|
||||||
|
|
||||||
using (Stream stream = assembly.GetManifestResourceStream(fileName))
|
|
||||||
{
|
|
||||||
if (stream == null)
|
|
||||||
{
|
|
||||||
L.Error(
|
|
||||||
$"{Ffi.RustLib} not found. This means it was not embedded in the mod. Please contact the mod author!"
|
|
||||||
);
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try
|
private void OnDestroy()
|
||||||
{
|
{
|
||||||
using (FileStream fileStream = new FileStream(destinationPath, FileMode.Create))
|
if (Marshal.Destroy())
|
||||||
{
|
{
|
||||||
stream.CopyTo(fileStream);
|
L.Info("FFI references cleaned up.");
|
||||||
}
|
}
|
||||||
return true;
|
if (this._harmony is not null)
|
||||||
}
|
|
||||||
catch (IOException e)
|
|
||||||
{
|
{
|
||||||
L.Warning($"Could not overwrite {fileName} (it might be in use): {e.Message}");
|
this._harmony.UnpatchSelf();
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ impl<'a> Tokenizer<'a> {
|
|||||||
Ok(Self {
|
Ok(Self {
|
||||||
reader,
|
reader,
|
||||||
line: 1,
|
line: 1,
|
||||||
column: 1,
|
column: 0, // Start at 0 so first char becomes 1
|
||||||
char_buffer: [0],
|
char_buffer: [0],
|
||||||
returned_eof: false,
|
returned_eof: false,
|
||||||
string_buffer: String::new(),
|
string_buffer: String::new(),
|
||||||
@@ -71,7 +71,7 @@ impl<'a> From<String> for Tokenizer<'a> {
|
|||||||
Self {
|
Self {
|
||||||
reader,
|
reader,
|
||||||
line: 1,
|
line: 1,
|
||||||
column: 1,
|
column: 0,
|
||||||
char_buffer: [0],
|
char_buffer: [0],
|
||||||
returned_eof: false,
|
returned_eof: false,
|
||||||
string_buffer: String::new(),
|
string_buffer: String::new(),
|
||||||
@@ -84,7 +84,7 @@ impl<'a> From<&'a str> for Tokenizer<'a> {
|
|||||||
Self {
|
Self {
|
||||||
reader: BufReader::new(Box::new(Cursor::new(value)) as Box<dyn Tokenize>),
|
reader: BufReader::new(Box::new(Cursor::new(value)) as Box<dyn Tokenize>),
|
||||||
char_buffer: [0],
|
char_buffer: [0],
|
||||||
column: 1,
|
column: 0,
|
||||||
line: 1,
|
line: 1,
|
||||||
returned_eof: false,
|
returned_eof: false,
|
||||||
string_buffer: String::new(),
|
string_buffer: String::new(),
|
||||||
@@ -93,12 +93,6 @@ impl<'a> From<&'a str> for Tokenizer<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Tokenizer<'a> {
|
impl<'a> Tokenizer<'a> {
|
||||||
/// Consumes the tokenizer and returns the next token in the stream
|
|
||||||
/// If there are no more tokens in the stream, this function returns None
|
|
||||||
/// If there is an error reading the stream, this function returns an error
|
|
||||||
///
|
|
||||||
/// # Important
|
|
||||||
/// This function will increment the line and column counters
|
|
||||||
fn next_char(&mut self) -> Result<Option<char>, Error> {
|
fn next_char(&mut self) -> Result<Option<char>, Error> {
|
||||||
let bytes_read = self.reader.read(&mut self.char_buffer)?;
|
let bytes_read = self.reader.read(&mut self.char_buffer)?;
|
||||||
|
|
||||||
@@ -106,7 +100,6 @@ impl<'a> Tokenizer<'a> {
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1
|
|
||||||
let c = self.char_buffer[0] as char;
|
let c = self.char_buffer[0] as char;
|
||||||
if c == '\n' {
|
if c == '\n' {
|
||||||
self.line += 1;
|
self.line += 1;
|
||||||
@@ -119,30 +112,17 @@ impl<'a> Tokenizer<'a> {
|
|||||||
Ok(Some(c))
|
Ok(Some(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peeks the next character in the stream without consuming it
|
|
||||||
///
|
|
||||||
/// # Important
|
|
||||||
/// This does not increment the line or column counters
|
|
||||||
fn peek_next_char(&mut self) -> Result<Option<char>, Error> {
|
fn peek_next_char(&mut self) -> Result<Option<char>, Error> {
|
||||||
let current_pos = self.reader.stream_position()?;
|
let current_pos = self.reader.stream_position()?;
|
||||||
|
|
||||||
let to_return = if self.reader.read(&mut self.char_buffer)? == 0 {
|
let to_return = if self.reader.read(&mut self.char_buffer)? == 0 {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.reader.seek(SeekFrom::Start(current_pos))?;
|
self.reader.seek(SeekFrom::Start(current_pos))?;
|
||||||
|
|
||||||
// Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1
|
|
||||||
Some(self.char_buffer[0] as char)
|
Some(self.char_buffer[0] as char)
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(to_return)
|
Ok(to_return)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Skips the current line in the stream.
|
|
||||||
/// Useful for skipping comments or empty lines
|
|
||||||
///
|
|
||||||
/// # Important
|
|
||||||
/// This function will increment the line and column counters
|
|
||||||
fn skip_line(&mut self) -> Result<(), Error> {
|
fn skip_line(&mut self) -> Result<(), Error> {
|
||||||
while let Some(next_char) = self.next_char()? {
|
while let Some(next_char) = self.next_char()? {
|
||||||
if next_char == '\n' {
|
if next_char == '\n' {
|
||||||
@@ -152,40 +132,50 @@ impl<'a> Tokenizer<'a> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consumes the tokenizer and returns the next token in the stream
|
|
||||||
/// If there are no more tokens in the stream, this function returns None
|
|
||||||
pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
|
pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
|
||||||
|
self.string_buffer.clear();
|
||||||
|
|
||||||
while let Some(next_char) = self.next_char()? {
|
while let Some(next_char) = self.next_char()? {
|
||||||
// skip whitespace
|
|
||||||
if next_char.is_whitespace() {
|
if next_char.is_whitespace() {
|
||||||
|
self.string_buffer.clear();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// skip comments
|
|
||||||
if next_char == '/' && self.peek_next_char()? == Some('/') {
|
if next_char == '/' && self.peek_next_char()? == Some('/') {
|
||||||
self.skip_line()?;
|
self.skip_line()?;
|
||||||
|
self.string_buffer.clear();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Capture start position before delegating
|
||||||
|
let start_line = self.line;
|
||||||
|
let start_col = self.column;
|
||||||
|
|
||||||
match next_char {
|
match next_char {
|
||||||
// numbers
|
|
||||||
'0'..='9' => {
|
'0'..='9' => {
|
||||||
return self.tokenize_number(next_char).map(Some);
|
return self
|
||||||
|
.tokenize_number(next_char, start_line, start_col)
|
||||||
|
.map(Some);
|
||||||
|
}
|
||||||
|
'"' | '\'' => {
|
||||||
|
return self
|
||||||
|
.tokenize_string(next_char, start_line, start_col)
|
||||||
|
.map(Some);
|
||||||
}
|
}
|
||||||
// strings
|
|
||||||
'"' | '\'' => return self.tokenize_string(next_char).map(Some),
|
|
||||||
// symbols excluding `"` and `'`
|
|
||||||
char if !char.is_alphanumeric() && char != '"' && char != '\'' => {
|
char if !char.is_alphanumeric() && char != '"' && char != '\'' => {
|
||||||
return self.tokenize_symbol(next_char).map(Some);
|
return self
|
||||||
|
.tokenize_symbol(next_char, start_line, start_col)
|
||||||
|
.map(Some);
|
||||||
}
|
}
|
||||||
// keywords and identifiers
|
|
||||||
char if char.is_alphabetic() => {
|
char if char.is_alphabetic() => {
|
||||||
return self.tokenize_keyword_or_identifier(next_char).map(Some);
|
return self
|
||||||
|
.tokenize_keyword_or_identifier(next_char, start_line, start_col)
|
||||||
|
.map(Some);
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
return Err(Error::UnknownSymbolError(
|
return Err(Error::UnknownSymbolError(
|
||||||
next_char,
|
next_char,
|
||||||
self.line,
|
start_line,
|
||||||
self.column,
|
start_col,
|
||||||
std::mem::take(&mut self.string_buffer),
|
std::mem::take(&mut self.string_buffer),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
@@ -204,13 +194,10 @@ impl<'a> Tokenizer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peeks the next token in the stream without consuming it
|
|
||||||
/// If there are no more tokens in the stream, this function returns None
|
|
||||||
pub fn peek_next(&mut self) -> Result<Option<Token>, Error> {
|
pub fn peek_next(&mut self) -> Result<Option<Token>, Error> {
|
||||||
let current_pos = self.reader.stream_position()?;
|
let current_pos = self.reader.stream_position()?;
|
||||||
let column = self.column;
|
let column = self.column;
|
||||||
let line = self.line;
|
let line = self.line;
|
||||||
|
|
||||||
let token = self.next_token()?;
|
let token = self.next_token()?;
|
||||||
self.reader.seek(SeekFrom::Start(current_pos))?;
|
self.reader.seek(SeekFrom::Start(current_pos))?;
|
||||||
self.column = column;
|
self.column = column;
|
||||||
@@ -218,22 +205,26 @@ impl<'a> Tokenizer<'a> {
|
|||||||
Ok(token)
|
Ok(token)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tokenizes a symbol
|
// Updated helper functions to accept start_line and start_col
|
||||||
fn tokenize_symbol(&mut self, first_symbol: char) -> Result<Token, Error> {
|
|
||||||
/// Helper macro to create a symbol token
|
fn tokenize_symbol(
|
||||||
|
&mut self,
|
||||||
|
first_symbol: char,
|
||||||
|
line: usize,
|
||||||
|
col: usize,
|
||||||
|
) -> Result<Token, Error> {
|
||||||
macro_rules! symbol {
|
macro_rules! symbol {
|
||||||
($symbol:ident) => {
|
($symbol:ident) => {
|
||||||
Ok(Token::new(
|
Ok(Token::new(
|
||||||
TokenType::Symbol(Symbol::$symbol),
|
TokenType::Symbol(Symbol::$symbol),
|
||||||
self.line,
|
line,
|
||||||
self.column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
))
|
))
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
match first_symbol {
|
match first_symbol {
|
||||||
// single character symbols
|
|
||||||
'(' => symbol!(LParen),
|
'(' => symbol!(LParen),
|
||||||
')' => symbol!(RParen),
|
')' => symbol!(RParen),
|
||||||
'{' => symbol!(LBrace),
|
'{' => symbol!(LBrace),
|
||||||
@@ -246,42 +237,34 @@ impl<'a> Tokenizer<'a> {
|
|||||||
'+' => symbol!(Plus),
|
'+' => symbol!(Plus),
|
||||||
'-' => symbol!(Minus),
|
'-' => symbol!(Minus),
|
||||||
'/' => symbol!(Slash),
|
'/' => symbol!(Slash),
|
||||||
|
|
||||||
'.' => symbol!(Dot),
|
'.' => symbol!(Dot),
|
||||||
'^' => symbol!(Caret),
|
'^' => symbol!(Caret),
|
||||||
'%' => symbol!(Percent),
|
'%' => symbol!(Percent),
|
||||||
|
|
||||||
// multi-character symbols
|
|
||||||
'<' if self.peek_next_char()? == Some('=') => {
|
'<' if self.peek_next_char()? == Some('=') => {
|
||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
symbol!(LessThanOrEqual)
|
symbol!(LessThanOrEqual)
|
||||||
}
|
}
|
||||||
'<' => symbol!(LessThan),
|
'<' => symbol!(LessThan),
|
||||||
|
|
||||||
'>' if self.peek_next_char()? == Some('=') => {
|
'>' if self.peek_next_char()? == Some('=') => {
|
||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
symbol!(GreaterThanOrEqual)
|
symbol!(GreaterThanOrEqual)
|
||||||
}
|
}
|
||||||
'>' => symbol!(GreaterThan),
|
'>' => symbol!(GreaterThan),
|
||||||
|
|
||||||
'=' if self.peek_next_char()? == Some('=') => {
|
'=' if self.peek_next_char()? == Some('=') => {
|
||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
symbol!(Equal)
|
symbol!(Equal)
|
||||||
}
|
}
|
||||||
'=' => symbol!(Assign),
|
'=' => symbol!(Assign),
|
||||||
|
|
||||||
'!' if self.peek_next_char()? == Some('=') => {
|
'!' if self.peek_next_char()? == Some('=') => {
|
||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
symbol!(NotEqual)
|
symbol!(NotEqual)
|
||||||
}
|
}
|
||||||
'!' => symbol!(LogicalNot),
|
'!' => symbol!(LogicalNot),
|
||||||
|
|
||||||
'*' if self.peek_next_char()? == Some('*') => {
|
'*' if self.peek_next_char()? == Some('*') => {
|
||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
symbol!(Exp)
|
symbol!(Exp)
|
||||||
}
|
}
|
||||||
'*' => symbol!(Asterisk),
|
'*' => symbol!(Asterisk),
|
||||||
|
|
||||||
'&' if self.peek_next_char()? == Some('&') => {
|
'&' if self.peek_next_char()? == Some('&') => {
|
||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
symbol!(LogicalAnd)
|
symbol!(LogicalAnd)
|
||||||
@@ -290,45 +273,39 @@ impl<'a> Tokenizer<'a> {
|
|||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
symbol!(LogicalOr)
|
symbol!(LogicalOr)
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => Err(Error::UnknownSymbolError(
|
_ => Err(Error::UnknownSymbolError(
|
||||||
first_symbol,
|
first_symbol,
|
||||||
self.line,
|
line,
|
||||||
self.column,
|
col,
|
||||||
std::mem::take(&mut self.string_buffer),
|
std::mem::take(&mut self.string_buffer),
|
||||||
)),
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tokenizes a number literal. Also handles temperatures with a suffix of `c`, `f`, or `k`.
|
fn tokenize_number(
|
||||||
fn tokenize_number(&mut self, first_char: char) -> Result<Token, Error> {
|
&mut self,
|
||||||
|
first_char: char,
|
||||||
|
line: usize,
|
||||||
|
col: usize,
|
||||||
|
) -> Result<Token, Error> {
|
||||||
let mut primary = String::with_capacity(16);
|
let mut primary = String::with_capacity(16);
|
||||||
let mut decimal: Option<String> = None;
|
let mut decimal: Option<String> = None;
|
||||||
let mut reading_decimal = false;
|
let mut reading_decimal = false;
|
||||||
|
|
||||||
let column = self.column;
|
|
||||||
let line = self.line;
|
|
||||||
|
|
||||||
primary.push(first_char);
|
primary.push(first_char);
|
||||||
|
|
||||||
while let Some(next_char) = self.peek_next_char()? {
|
while let Some(next_char) = self.peek_next_char()? {
|
||||||
if next_char.is_whitespace() {
|
if next_char.is_whitespace() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if next_char == '.' {
|
if next_char == '.' {
|
||||||
reading_decimal = true;
|
reading_decimal = true;
|
||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// support underscores in numbers for readability
|
|
||||||
if next_char == '_' {
|
if next_char == '_' {
|
||||||
self.next_char()?;
|
self.next_char()?;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is for the times when we have a number followed by a symbol (like a semicolon or =)
|
|
||||||
if !next_char.is_numeric() {
|
if !next_char.is_numeric() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -343,33 +320,21 @@ impl<'a> Tokenizer<'a> {
|
|||||||
|
|
||||||
let number: Number = if let Some(decimal) = decimal {
|
let number: Number = if let Some(decimal) = decimal {
|
||||||
let decimal_scale = decimal.len() as u32;
|
let decimal_scale = decimal.len() as u32;
|
||||||
let number = format!("{}{}", primary, decimal)
|
let number_str = format!("{}{}", primary, decimal);
|
||||||
.parse::<i128>()
|
let number = number_str.parse::<i128>().map_err(|e| {
|
||||||
.map_err(|e| {
|
Error::NumberParseError(e, line, col, std::mem::take(&mut self.string_buffer))
|
||||||
Error::NumberParseError(
|
|
||||||
e,
|
|
||||||
self.line,
|
|
||||||
self.column,
|
|
||||||
std::mem::take(&mut self.string_buffer),
|
|
||||||
)
|
|
||||||
})?;
|
})?;
|
||||||
Number::Decimal(
|
Number::Decimal(
|
||||||
Decimal::try_from_i128_with_scale(number, decimal_scale).map_err(|e| {
|
Decimal::try_from_i128_with_scale(number, decimal_scale).map_err(|e| {
|
||||||
Error::DecimalParseError(
|
Error::DecimalParseError(e, line, col, std::mem::take(&mut self.string_buffer))
|
||||||
e,
|
|
||||||
line,
|
|
||||||
column,
|
|
||||||
std::mem::take(&mut self.string_buffer),
|
|
||||||
)
|
|
||||||
})?,
|
})?,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
Number::Integer(primary.parse().map_err(|e| {
|
Number::Integer(primary.parse().map_err(|e| {
|
||||||
Error::NumberParseError(e, line, column, std::mem::take(&mut self.string_buffer))
|
Error::NumberParseError(e, line, col, std::mem::take(&mut self.string_buffer))
|
||||||
})?)
|
})?)
|
||||||
};
|
};
|
||||||
|
|
||||||
// check if the next char is a temperature suffix
|
|
||||||
if let Some(next_char) = self.peek_next_char()? {
|
if let Some(next_char) = self.peek_next_char()? {
|
||||||
let temperature = match next_char {
|
let temperature = match next_char {
|
||||||
'c' => Temperature::Celsius(number),
|
'c' => Temperature::Celsius(number),
|
||||||
@@ -379,7 +344,7 @@ impl<'a> Tokenizer<'a> {
|
|||||||
return Ok(Token::new(
|
return Ok(Token::new(
|
||||||
TokenType::Number(number),
|
TokenType::Number(number),
|
||||||
line,
|
line,
|
||||||
column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
@@ -390,74 +355,65 @@ impl<'a> Tokenizer<'a> {
|
|||||||
Ok(Token::new(
|
Ok(Token::new(
|
||||||
TokenType::Number(temperature),
|
TokenType::Number(temperature),
|
||||||
line,
|
line,
|
||||||
column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
Ok(Token::new(
|
Ok(Token::new(
|
||||||
TokenType::Number(number),
|
TokenType::Number(number),
|
||||||
line,
|
line,
|
||||||
column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tokenizes a string literal
|
fn tokenize_string(
|
||||||
fn tokenize_string(&mut self, beginning_quote: char) -> Result<Token, Error> {
|
&mut self,
|
||||||
|
beginning_quote: char,
|
||||||
|
line: usize,
|
||||||
|
col: usize,
|
||||||
|
) -> Result<Token, Error> {
|
||||||
let mut buffer = String::with_capacity(16);
|
let mut buffer = String::with_capacity(16);
|
||||||
|
|
||||||
let column = self.column;
|
|
||||||
let line = self.line;
|
|
||||||
|
|
||||||
while let Some(next_char) = self.next_char()? {
|
while let Some(next_char) = self.next_char()? {
|
||||||
if next_char == beginning_quote {
|
if next_char == beginning_quote {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer.push(next_char);
|
buffer.push(next_char);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Token::new(
|
Ok(Token::new(
|
||||||
TokenType::String(buffer),
|
TokenType::String(buffer),
|
||||||
line,
|
line,
|
||||||
column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tokenizes a keyword or an identifier. Also handles boolean literals
|
fn tokenize_keyword_or_identifier(
|
||||||
fn tokenize_keyword_or_identifier(&mut self, first_char: char) -> Result<Token, Error> {
|
&mut self,
|
||||||
|
first_char: char,
|
||||||
|
line: usize,
|
||||||
|
col: usize,
|
||||||
|
) -> Result<Token, Error> {
|
||||||
macro_rules! keyword {
|
macro_rules! keyword {
|
||||||
($keyword:ident) => {{
|
($keyword:ident) => {{
|
||||||
return Ok(Token::new(
|
return Ok(Token::new(
|
||||||
TokenType::Keyword(Keyword::$keyword),
|
TokenType::Keyword(Keyword::$keyword),
|
||||||
self.line,
|
line,
|
||||||
self.column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
));
|
));
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper macro to check if the next character is whitespace or not alphanumeric
|
|
||||||
macro_rules! next_ws {
|
macro_rules! next_ws {
|
||||||
() => {
|
() => { matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || self.peek_next_char()?.is_none() };
|
||||||
matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || self.peek_next_char()?.is_none()
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut buffer = String::with_capacity(16);
|
let mut buffer = String::with_capacity(16);
|
||||||
let line = self.line;
|
|
||||||
let column = self.column;
|
|
||||||
|
|
||||||
let mut looped_char = Some(first_char);
|
let mut looped_char = Some(first_char);
|
||||||
|
|
||||||
while let Some(next_char) = looped_char {
|
while let Some(next_char) = looped_char {
|
||||||
if next_char.is_whitespace() {
|
if next_char.is_whitespace() || !next_char.is_alphanumeric() {
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if !next_char.is_alphanumeric() {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
buffer.push(next_char);
|
buffer.push(next_char);
|
||||||
@@ -474,51 +430,47 @@ impl<'a> Tokenizer<'a> {
|
|||||||
"break" if next_ws!() => keyword!(Break),
|
"break" if next_ws!() => keyword!(Break),
|
||||||
"while" if next_ws!() => keyword!(While),
|
"while" if next_ws!() => keyword!(While),
|
||||||
"continue" if next_ws!() => keyword!(Continue),
|
"continue" if next_ws!() => keyword!(Continue),
|
||||||
|
|
||||||
// boolean literals
|
|
||||||
"true" if next_ws!() => {
|
"true" if next_ws!() => {
|
||||||
return Ok(Token::new(
|
return Ok(Token::new(
|
||||||
TokenType::Boolean(true),
|
TokenType::Boolean(true),
|
||||||
self.line,
|
line,
|
||||||
self.column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
"false" if next_ws!() => {
|
"false" if next_ws!() => {
|
||||||
return Ok(Token::new(
|
return Ok(Token::new(
|
||||||
TokenType::Boolean(false),
|
TokenType::Boolean(false),
|
||||||
self.line,
|
line,
|
||||||
self.column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
// if the next character is whitespace or not alphanumeric, then we have an identifier
|
|
||||||
// this is because keywords are checked first
|
|
||||||
val if next_ws!() => {
|
val if next_ws!() => {
|
||||||
return Ok(Token::new(
|
return Ok(Token::new(
|
||||||
TokenType::Identifier(val.to_string()),
|
TokenType::Identifier(val.to_string()),
|
||||||
line,
|
line,
|
||||||
column,
|
col,
|
||||||
Some(std::mem::take(&mut self.string_buffer)),
|
Some(std::mem::take(&mut self.string_buffer)),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
looped_char = self.next_char()?;
|
looped_char = self.next_char()?;
|
||||||
}
|
}
|
||||||
Err(Error::UnknownKeywordOrIdentifierError(
|
Err(Error::UnknownKeywordOrIdentifierError(
|
||||||
buffer,
|
buffer,
|
||||||
line,
|
line,
|
||||||
column,
|
col,
|
||||||
std::mem::take(&mut self.string_buffer),
|
std::mem::take(&mut self.string_buffer),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ... Iterator and TokenizerBuffer implementations remain unchanged ...
|
||||||
|
// They just call the methods above which now use the passed-in start coordinates.
|
||||||
impl<'a> Iterator for Tokenizer<'a> {
|
impl<'a> Iterator for Tokenizer<'a> {
|
||||||
type Item = Result<Token, Error>;
|
type Item = Result<Token, Error>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
match self.next_token() {
|
match self.next_token() {
|
||||||
Ok(Some(tok)) => Some(Ok(tok)),
|
Ok(Some(tok)) => Some(Ok(tok)),
|
||||||
@@ -542,38 +494,26 @@ impl<'a> TokenizerBuffer<'a> {
|
|||||||
history: VecDeque::with_capacity(128),
|
history: VecDeque::with_capacity(128),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reads the next token from the tokenizer, pushing the value to the back of the history
|
|
||||||
/// and returning the token
|
|
||||||
pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
|
pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
|
||||||
if let Some(token) = self.buffer.pop_front() {
|
if let Some(token) = self.buffer.pop_front() {
|
||||||
self.history.push_back(token.clone());
|
self.history.push_back(token.clone());
|
||||||
return Ok(Some(token));
|
return Ok(Some(token));
|
||||||
}
|
}
|
||||||
|
|
||||||
let token = self.tokenizer.next_token()?;
|
let token = self.tokenizer.next_token()?;
|
||||||
if let Some(ref token) = token {
|
if let Some(ref token) = token {
|
||||||
self.history.push_back(token.clone());
|
self.history.push_back(token.clone());
|
||||||
}
|
}
|
||||||
Ok(token)
|
Ok(token)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peeks the next token in the stream without adding to the history stack
|
|
||||||
pub fn peek(&mut self) -> Result<Option<Token>, Error> {
|
pub fn peek(&mut self) -> Result<Option<Token>, Error> {
|
||||||
if let Some(token) = self.buffer.front() {
|
if let Some(token) = self.buffer.front() {
|
||||||
return Ok(Some(token.clone()));
|
return Ok(Some(token.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
let token = self.tokenizer.peek_next()?;
|
let token = self.tokenizer.peek_next()?;
|
||||||
Ok(token)
|
Ok(token)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), Error> {
|
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), Error> {
|
||||||
use Ordering::*;
|
use Ordering::*;
|
||||||
// if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer
|
|
||||||
// if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history)
|
|
||||||
// if seek_to == 0 then we don't need to do anything
|
|
||||||
|
|
||||||
match seek_to.cmp(&0) {
|
match seek_to.cmp(&0) {
|
||||||
Greater => {
|
Greater => {
|
||||||
let mut tokens = Vec::with_capacity(seek_to as usize);
|
let mut tokens = Vec::with_capacity(seek_to as usize);
|
||||||
@@ -606,18 +546,13 @@ impl<'a> TokenizerBuffer<'a> {
|
|||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Adds to or removes from the History stack, allowing the user to move back and forth in the stream
|
|
||||||
pub fn seek(&mut self, from: SeekFrom) -> Result<(), Error> {
|
pub fn seek(&mut self, from: SeekFrom) -> Result<(), Error> {
|
||||||
match from {
|
match from {
|
||||||
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
|
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
|
||||||
SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"),
|
_ => unimplemented!("SeekFrom::End/Start not implemented"),
|
||||||
SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -682,7 +617,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(char, Some('f'));
|
assert_eq!(char, Some('f'));
|
||||||
assert_eq!(tokenizer.line, 1);
|
assert_eq!(tokenizer.line, 1);
|
||||||
assert_eq!(tokenizer.column, 2);
|
assert_eq!(tokenizer.column, 1);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -695,7 +630,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(char, Some('\n'));
|
assert_eq!(char, Some('\n'));
|
||||||
assert_eq!(tokenizer.line, 1);
|
assert_eq!(tokenizer.line, 1);
|
||||||
assert_eq!(tokenizer.column, 1);
|
assert_eq!(tokenizer.column, 0);
|
||||||
|
|
||||||
let char = tokenizer.next_char()?;
|
let char = tokenizer.next_char()?;
|
||||||
assert_eq!(char, Some('\n'));
|
assert_eq!(char, Some('\n'));
|
||||||
@@ -1010,4 +945,36 @@ mod tests {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_identifier_has_correct_length() -> Result<()> {
|
||||||
|
let mut tokenizer = Tokenizer::from("hello");
|
||||||
|
assert_eq!(
|
||||||
|
tokenizer.next_token()?,
|
||||||
|
Some(Token {
|
||||||
|
token_type: TokenType::Identifier("hello".into()),
|
||||||
|
original_string: Some("hello".into()),
|
||||||
|
column: 1,
|
||||||
|
line: 1
|
||||||
|
})
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_keyword_token_has_correct_length() -> Result<()> {
|
||||||
|
let mut tokenizer = Tokenizer::from("while");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
tokenizer.next_token()?,
|
||||||
|
Some(Token {
|
||||||
|
token_type: TokenType::Keyword(Keyword::While),
|
||||||
|
original_string: Some("while".into()),
|
||||||
|
column: 1,
|
||||||
|
line: 1
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,11 +7,24 @@ use tokenizer::{token::TokenType, Error as TokenizerError, Tokenizer};
|
|||||||
#[derive_ReprC]
|
#[derive_ReprC]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct FfiToken {
|
pub struct FfiToken {
|
||||||
pub text: safer_ffi::String,
|
|
||||||
pub tooltip: safer_ffi::String,
|
pub tooltip: safer_ffi::String,
|
||||||
pub error: safer_ffi::String,
|
pub error: safer_ffi::String,
|
||||||
pub status: safer_ffi::String,
|
|
||||||
pub column: i32,
|
pub column: i32,
|
||||||
|
pub length: i32,
|
||||||
|
pub token_kind: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_token_kind(t: &TokenType) -> u32 {
|
||||||
|
use TokenType::*;
|
||||||
|
match t {
|
||||||
|
Keyword(_) => 1,
|
||||||
|
Identifier(_) => 2,
|
||||||
|
Number(_) => 3,
|
||||||
|
String(_) => 4,
|
||||||
|
Boolean(_) => 5,
|
||||||
|
Symbol(_) => 6,
|
||||||
|
_ => 0,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because
|
/// C# handles strings as UTF16. We do NOT want to allocate that memory in C# because
|
||||||
@@ -49,29 +62,29 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<Ff
|
|||||||
|
|
||||||
for token in tokenizer {
|
for token in tokenizer {
|
||||||
match token {
|
match token {
|
||||||
Err(TokenizerError::NumberParseError(_, _, col, ref original))
|
Err(TokenizerError::NumberParseError(_, _, col, ref str))
|
||||||
| Err(TokenizerError::UnknownSymbolError(_, _, col, ref original))
|
| Err(TokenizerError::UnknownSymbolError(_, _, col, ref str))
|
||||||
| Err(TokenizerError::DecimalParseError(_, _, col, ref original))
|
| Err(TokenizerError::DecimalParseError(_, _, col, ref str))
|
||||||
| Err(TokenizerError::UnknownKeywordOrIdentifierError(_, _, col, ref original)) => {
|
| Err(TokenizerError::UnknownKeywordOrIdentifierError(_, _, col, ref str)) => {
|
||||||
tokens.push(FfiToken {
|
tokens.push(FfiToken {
|
||||||
column: col as i32,
|
column: col as i32 - 1,
|
||||||
text: original.to_string().into(),
|
|
||||||
tooltip: "".into(),
|
tooltip: "".into(),
|
||||||
|
length: str.len() as i32,
|
||||||
|
token_kind: 0,
|
||||||
// Safety: it's okay to unwrap the err here because we are matching on the `Err` variant
|
// Safety: it's okay to unwrap the err here because we are matching on the `Err` variant
|
||||||
error: token.unwrap_err().to_string().into(),
|
error: token.unwrap_err().to_string().into(),
|
||||||
status: "".into(),
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Err(_) => return safer_ffi::Vec::EMPTY,
|
Err(_) => return safer_ffi::Vec::EMPTY,
|
||||||
Ok(token) if !matches!(token.token_type, TokenType::EOF) => tokens.push(FfiToken {
|
Ok(token) if !matches!(token.token_type, TokenType::EOF) => tokens.push(FfiToken {
|
||||||
text: token
|
|
||||||
.original_string
|
|
||||||
.unwrap_or(token.token_type.to_string())
|
|
||||||
.into(),
|
|
||||||
tooltip: "".into(),
|
tooltip: "".into(),
|
||||||
error: "".into(),
|
error: "".into(),
|
||||||
status: "".into(),
|
length: token
|
||||||
column: token.column as i32,
|
.original_string
|
||||||
|
.map(|s| s.len() as i32)
|
||||||
|
.unwrap_or_default(),
|
||||||
|
token_kind: map_token_kind(&token.token_type),
|
||||||
|
column: token.column as i32 - 1,
|
||||||
}),
|
}),
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user