From 115a57128caa81e4e8698e282a0132052461913c Mon Sep 17 00:00:00 2001
From: Devin Bidwell <dbidwell94@biddydev.com>
Date: Mon, 8 Dec 2025 22:50:20 -0700
Subject: [PATCH] Before error type refactor

---
 rust_compiler/Cargo.lock                  |  22 +
 rust_compiler/libs/parser/Cargo.toml      |   1 +
 rust_compiler/libs/parser/src/lib.rs      |   7 +-
 rust_compiler/libs/tokenizer/Cargo.toml   |   1 +
 rust_compiler/libs/tokenizer/src/lib.rs   | 927 +---------------------
 rust_compiler/libs/tokenizer/src/token.rs | 174 ++--
 6 files changed, 195 insertions(+), 937 deletions(-)
diff --git a/rust_compiler/Cargo.lock b/rust_compiler/Cargo.lock
index d5e79e8..c255c15 100644
--- a/rust_compiler/Cargo.lock
+++ b/rust_compiler/Cargo.lock
@@ -572,6 +572,7 @@ dependencies = [
  "lsp-types",
  "pretty_assertions",
  "quick-error",
+ "thiserror",
  "tokenizer",
 ]
 
@@ -998,6 +999,26 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
 
+[[package]]
+name = "thiserror"
+version = "2.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.111",
+]
+
 [[package]]
 name = "tinyvec"
 version = "1.10.0"
@@ -1023,6 +1044,7 @@ dependencies = [
  "lsp-types",
  "quick-error",
  "rust_decimal",
+ "thiserror",
 ]
 
 [[package]]
diff --git a/rust_compiler/libs/parser/Cargo.toml b/rust_compiler/libs/parser/Cargo.toml
index 336b498..e3c304b 100644
--- a/rust_compiler/libs/parser/Cargo.toml
+++ b/rust_compiler/libs/parser/Cargo.toml
@@ -8,6 +8,7 @@ quick-error = { workspace = true }
 tokenizer = { path = "../tokenizer" }
 helpers = { path = "../helpers" }
 lsp-types = { workspace = true }
+thiserror = "2"
 
 
 [dev-dependencies]
diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs
index 2011ebb..f00150d 100644
--- a/rust_compiler/libs/parser/src/lib.rs
+++ b/rust_compiler/libs/parser/src/lib.rs
@@ -111,7 +111,7 @@ macro_rules! self_matches_current {
 
 pub struct Parser<'a> {
     tokenizer: TokenizerBuffer<'a>,
-    current_token: Option<Token>,
+    current_token: Option<Token<'a>>,
     pub errors: Vec<Error>,
 }
 
@@ -126,12 +126,11 @@ impl<'a> Parser<'a> {
 
     /// Calculates a Span from a given Token reference.
     fn token_to_span(t: &Token) -> Span {
-        let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0);
         Span {
             start_line: t.line,
-            start_col: t.column,
+            start_col: t.span.start,
             end_line: t.line,
-            end_col: t.column + len,
+            end_col: t.span.end,
         }
     }
 
diff --git a/rust_compiler/libs/tokenizer/Cargo.toml b/rust_compiler/libs/tokenizer/Cargo.toml
index 37b5611..9d50066 100644
--- a/rust_compiler/libs/tokenizer/Cargo.toml
+++ b/rust_compiler/libs/tokenizer/Cargo.toml
@@ -9,6 +9,7 @@ quick-error = { workspace = true }
 lsp-types = { workspace = true }
 helpers = { path = "../helpers" }
 logos = "0.16"
+thiserror = "2"
 
 [dev-dependencies]
 anyhow = { version = "^1" }
diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs
index 3d8dabb..44b2223 100644
--- a/rust_compiler/libs/tokenizer/src/lib.rs
+++ b/rust_compiler/libs/tokenizer/src/lib.rs
@@ -1,14 +1,15 @@
 pub mod token;
 
+use logos::{Lexer, Logos};
 use quick_error::quick_error;
-use rust_decimal::Decimal;
 use std::{
     cmp::Ordering,
     collections::VecDeque,
     io::{BufReader, Cursor, Read, Seek, SeekFrom},
+    iter::Peekable,
     path::PathBuf,
 };
-use token::{Keyword, Number, Symbol, Temperature, Token, TokenType};
+use token::{Token, TokenType};
 
 quick_error! {
     #[derive(Debug)]
@@ -18,19 +19,8 @@ quick_error! {
             display("IO Error: {}", err)
             source(err)
         }
-        NumberParseError(err: std::num::ParseIntError, line: usize, column: usize, original: String) {
-            display("Number Parse Error: {}", err)
-            source(err)
-        }
-        DecimalParseError(err: rust_decimal::Error, line: usize, column: usize, original: String) {
-            display("Decimal Parse Error: {}", err)
-            source(err)
-        }
-        UnknownSymbolError(char: char, line: usize, column: usize, original: String) {
-            display("Unknown Symbol: {}", char)
-        }
-        UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize, original: String) {
-            display("Unknown Keyword or Identifier: {}", val)
+        LexError(err: token::LexError) {
+            from()
         }
     }
 }
@@ -46,24 +36,7 @@ impl From<Error> for lsp_types::Diagnostic {
                 severity: Some(DiagnosticSeverity::ERROR),
                 ..Default::default()
             },
-            NumberParseError(_, l, c, ref og)
-            | DecimalParseError(_, l, c, ref og)
-            | UnknownSymbolError(_, l, c, ref og)
-            | UnknownKeywordOrIdentifierError(_, l, c, ref og) => Diagnostic {
-                range: Range {
-                    start: Position {
-                        line: l as u32,
-                        character: c as u32,
-                    },
-                    end: Position {
-                        line: l as u32,
-                        character: (c + og.len()) as u32,
-                    },
-                },
-                message: value.to_string(),
-                severity: Some(DiagnosticSeverity::ERROR),
-                ..Default::default()
-            },
+            LexError(e) => e.into(),
         }
     }
 }
@@ -73,452 +46,74 @@ pub trait Tokenize: Read + Seek {}
 impl<T> Tokenize for T where T: Read + Seek {}
 
 pub struct Tokenizer<'a> {
-    reader: BufReader<Box<dyn Tokenize + 'a>>,
-    char_buffer: [u8; 1],
-    line: usize,
-    column: usize,
+    lexer: Lexer<'a, TokenType<'a>>,
     returned_eof: bool,
-    string_buffer: String,
-}
-
-impl<'a> Tokenizer<'a> {
-    pub fn from_path(input_file: impl Into<PathBuf>) -> Result<Self, Error> {
-        let file = std::fs::File::open(input_file.into())?;
-        let reader = BufReader::new(Box::new(file) as Box<dyn Tokenize>);
-
-        Ok(Self {
-            reader,
-            line: 1,
-            column: 0, // Start at 0 so first char becomes 1
-            char_buffer: [0],
-            returned_eof: false,
-            string_buffer: String::new(),
-        })
-    }
-}
-
-impl<'a> From<String> for Tokenizer<'a> {
-    fn from(input: String) -> Self {
-        let reader = BufReader::new(Box::new(Cursor::new(input)) as Box<dyn Tokenize>);
-
-        Self {
-            reader,
-            line: 1,
-            column: 0,
-            char_buffer: [0],
-            returned_eof: false,
-            string_buffer: String::new(),
-        }
-    }
 }
 
 impl<'a> From<&'a str> for Tokenizer<'a> {
     fn from(value: &'a str) -> Self {
         Self {
-            reader: BufReader::new(Box::new(Cursor::new(value)) as Box<dyn Tokenize>),
-            char_buffer: [0],
-            column: 0,
-            line: 1,
+            lexer: TokenType::lexer(value),
             returned_eof: false,
-            string_buffer: String::new(),
         }
     }
 }
 
 impl<'a> Tokenizer<'a> {
-    fn next_char(&mut self) -> Result<Option<char>, Error> {
-        let bytes_read = self.reader.read(&mut self.char_buffer)?;
-
-        if bytes_read == 0 {
-            return Ok(None);
-        }
-
-        let c = self.char_buffer[0] as char;
-        if c == '\n' {
-            self.line += 1;
-            self.column = 1;
-        } else {
-            self.column += 1;
-        }
-
-        self.string_buffer.push(c);
-        Ok(Some(c))
+    fn to_token(&mut self, t_type: TokenType<'a>) -> Token<'a> {
+        let mut span = self.lexer.span();
+        span.start -= self.lexer.extras.line_start_index;
+        span.end -= self.lexer.extras.line_start_index;
+        Token::new(t_type, self.lexer.extras.line_count, span)
     }
 
-    fn peek_next_char(&mut self) -> Result<Option<char>, Error> {
-        let current_pos = self.reader.stream_position()?;
-        let to_return = if self.reader.read(&mut self.char_buffer)? == 0 {
-            None
-        } else {
-            self.reader.seek(SeekFrom::Start(current_pos))?;
-            Some(self.char_buffer[0] as char)
-        };
+    pub fn next_token(&mut self) -> Result<Option<Token<'a>>, Error> {
+        let to_return = self
+            .lexer
+            .next()
+            .transpose()
+            .map(|t| t.map(|t| self.to_token(t)))?;
+
         Ok(to_return)
     }
 
-    fn skip_line(&mut self) -> Result<(), Error> {
-        while let Some(next_char) = self.next_char()? {
-            if next_char == '\n' {
-                break;
-            }
-        }
-        Ok(())
-    }
-
-    pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
-        self.string_buffer.clear();
-
-        while let Some(next_char) = self.next_char()? {
-            if next_char.is_whitespace() {
-                self.string_buffer.clear();
-                continue;
-            }
-            if next_char == '/' && self.peek_next_char()? == Some('/') {
-                self.skip_line()?;
-                self.string_buffer.clear();
-                continue;
-            }
-
-            // Capture start position before delegating
-            let start_line = self.line;
-            let start_col = self.column;
-
-            match next_char {
-                '0'..='9' => {
-                    return self
-                        .tokenize_number(next_char, start_line, start_col)
-                        .map(Some);
-                }
-                '"' | '\'' => {
-                    return self
-                        .tokenize_string(next_char, start_line, start_col)
-                        .map(Some);
-                }
-                char if !char.is_alphanumeric() && char != '"' && char != '\'' => {
-                    return self
-                        .tokenize_symbol(next_char, start_line, start_col)
-                        .map(Some);
-                }
-                char if char.is_alphabetic() || char == '_' => {
-                    return self
-                        .tokenize_keyword_or_identifier(next_char, start_line, start_col)
-                        .map(Some);
-                }
-                _ => {
-                    return Err(Error::UnknownSymbolError(
-                        next_char,
-                        start_line,
-                        start_col,
-                        std::mem::take(&mut self.string_buffer),
-                    ));
-                }
-            }
-        }
-        if self.returned_eof {
-            Ok(None)
-        } else {
-            self.returned_eof = true;
-            Ok(Some(Token::new(
-                TokenType::EOF,
-                self.line,
-                self.column,
-                Some(std::mem::take(&mut self.string_buffer)),
-            )))
-        }
-    }
-
-    pub fn peek_next(&mut self) -> Result<Option<Token>, Error> {
-        let current_pos = self.reader.stream_position()?;
-        let column = self.column;
-        let line = self.line;
-        let token = self.next_token()?;
-        self.reader.seek(SeekFrom::Start(current_pos))?;
-        self.column = column;
-        self.line = line;
-        Ok(token)
-    }
-
-    // Updated helper functions to accept start_line and start_col
-
-    fn tokenize_symbol(
-        &mut self,
-        first_symbol: char,
-        line: usize,
-        col: usize,
-    ) -> Result<Token, Error> {
-        macro_rules! symbol {
-            ($symbol:ident) => {
-                Ok(Token::new(
-                    TokenType::Symbol(Symbol::$symbol),
-                    line,
-                    col,
-                    Some(std::mem::take(&mut self.string_buffer)),
-                ))
-            };
-        }
-
-        match first_symbol {
-            '(' => symbol!(LParen),
-            ')' => symbol!(RParen),
-            '{' => symbol!(LBrace),
-            '}' => symbol!(RBrace),
-            '[' => symbol!(LBracket),
-            ']' => symbol!(RBracket),
-            ';' => symbol!(Semicolon),
-            ':' => symbol!(Colon),
-            ',' => symbol!(Comma),
-            '+' => symbol!(Plus),
-            '-' => symbol!(Minus),
-            '/' => symbol!(Slash),
-            '.' => symbol!(Dot),
-            '^' => symbol!(Caret),
-            '%' => symbol!(Percent),
-            '<' if self.peek_next_char()? == Some('=') => {
-                self.next_char()?;
-                symbol!(LessThanOrEqual)
-            }
-            '<' => symbol!(LessThan),
-            '>' if self.peek_next_char()? == Some('=') => {
-                self.next_char()?;
-                symbol!(GreaterThanOrEqual)
-            }
-            '>' => symbol!(GreaterThan),
-            '=' if self.peek_next_char()? == Some('=') => {
-                self.next_char()?;
-                symbol!(Equal)
-            }
-            '=' => symbol!(Assign),
-            '!' if self.peek_next_char()? == Some('=') => {
-                self.next_char()?;
-                symbol!(NotEqual)
-            }
-            '!' => symbol!(LogicalNot),
-            '*' if self.peek_next_char()? == Some('*') => {
-                self.next_char()?;
-                symbol!(Exp)
-            }
-            '*' => symbol!(Asterisk),
-            '&' if self.peek_next_char()? == Some('&') => {
-                self.next_char()?;
-                symbol!(LogicalAnd)
-            }
-            '|' if self.peek_next_char()? == Some('|') => {
-                self.next_char()?;
-                symbol!(LogicalOr)
-            }
-            _ => Err(Error::UnknownSymbolError(
-                first_symbol,
-                line,
-                col,
-                std::mem::take(&mut self.string_buffer),
-            )),
-        }
-    }
-
-    fn tokenize_number(
-        &mut self,
-        first_char: char,
-        line: usize,
-        col: usize,
-    ) -> Result<Token, Error> {
-        let mut primary = String::with_capacity(16);
-        let mut decimal: Option<String> = None;
-        let mut reading_decimal = false;
-        primary.push(first_char);
-
-        while let Some(next_char) = self.peek_next_char()? {
-            if next_char.is_whitespace() {
-                break;
-            }
-            if next_char == '.' {
-                reading_decimal = true;
-                self.next_char()?;
-                continue;
-            }
-            if next_char == '_' {
-                self.next_char()?;
-                continue;
-            }
-            if !next_char.is_numeric() {
-                break;
-            }
-
-            if reading_decimal {
-                decimal.get_or_insert_with(String::new).push(next_char);
-            } else {
-                primary.push(next_char);
-            }
-            self.next_char()?;
-        }
-
-        let number: Number = if let Some(decimal) = decimal {
-            let decimal_scale = decimal.len() as u32;
-            let number_str = format!("{}{}", primary, decimal);
-            let number = number_str.parse::<i128>().map_err(|e| {
-                Error::NumberParseError(e, line, col, std::mem::take(&mut self.string_buffer))
-            })?;
-            Number::Decimal(
-                Decimal::try_from_i128_with_scale(number, decimal_scale).map_err(|e| {
-                    Error::DecimalParseError(e, line, col, std::mem::take(&mut self.string_buffer))
-                })?,
-            )
-        } else {
-            Number::Integer(primary.parse().map_err(|e| {
-                Error::NumberParseError(e, line, col, std::mem::take(&mut self.string_buffer))
-            })?)
-        };
-
-        if let Some(next_char) = self.peek_next_char()? {
-            let temperature = match next_char {
-                'c' => Temperature::Celsius(number),
-                'f' => Temperature::Fahrenheit(number),
-                'k' => Temperature::Kelvin(number),
-                _ => {
-                    return Ok(Token::new(
-                        TokenType::Number(number),
-                        line,
-                        col,
-                        Some(std::mem::take(&mut self.string_buffer)),
-                    ));
-                }
-            }
-            .to_kelvin();
-
-            self.next_char()?;
-            Ok(Token::new(
-                TokenType::Number(temperature),
-                line,
-                col,
-                Some(std::mem::take(&mut self.string_buffer)),
-            ))
-        } else {
-            Ok(Token::new(
-                TokenType::Number(number),
-                line,
-                col,
-                Some(std::mem::take(&mut self.string_buffer)),
-            ))
-        }
-    }
-
-    fn tokenize_string(
-        &mut self,
-        beginning_quote: char,
-        line: usize,
-        col: usize,
-    ) -> Result<Token, Error> {
-        let mut buffer = String::with_capacity(16);
-        while let Some(next_char) = self.next_char()? {
-            if next_char == beginning_quote {
-                break;
-            }
-            buffer.push(next_char);
-        }
-        Ok(Token::new(
-            TokenType::String(buffer),
-            line,
-            col,
-            Some(std::mem::take(&mut self.string_buffer)),
-        ))
-    }
-
-    fn tokenize_keyword_or_identifier(
-        &mut self,
-        first_char: char,
-        line: usize,
-        col: usize,
-    ) -> Result<Token, Error> {
-        macro_rules! keyword {
-            ($keyword:ident) => {{
-                return Ok(Token::new(
-                    TokenType::Keyword(Keyword::$keyword),
-                    line,
-                    col,
-                    Some(std::mem::take(&mut self.string_buffer)),
-                ));
-            }};
-        }
-        macro_rules! next_ws {
-            () => { matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || (!x.is_alphanumeric()) && x != '_') || self.peek_next_char()?.is_none() };
-        }
-
-        let mut buffer = String::with_capacity(16);
-        let mut looped_char = Some(first_char);
-
-        while let Some(next_char) = looped_char {
-            // allow UNDERSCORE_IDENTS
-            if next_char.is_whitespace() || (!next_char.is_alphanumeric() && next_char != '_') {
-                break;
-            }
-            buffer.push(next_char);
-
-            match buffer.as_str() {
-                "let" if next_ws!() => keyword!(Let),
-                "fn" if next_ws!() => keyword!(Fn),
-                "if" if next_ws!() => keyword!(If),
-                "else" if next_ws!() => keyword!(Else),
-                "return" if next_ws!() => keyword!(Return),
-                "enum" if next_ws!() => keyword!(Enum),
-                "device" if next_ws!() => keyword!(Device),
-                "loop" if next_ws!() => keyword!(Loop),
-                "break" if next_ws!() => keyword!(Break),
-                "while" if next_ws!() => keyword!(While),
-                "continue" if next_ws!() => keyword!(Continue),
-                "const" if next_ws!() => keyword!(Const),
-                "true" if next_ws!() => {
-                    return Ok(Token::new(
-                        TokenType::Boolean(true),
-                        line,
-                        col,
-                        Some(std::mem::take(&mut self.string_buffer)),
-                    ));
-                }
-                "false" if next_ws!() => {
-                    return Ok(Token::new(
-                        TokenType::Boolean(false),
-                        line,
-                        col,
-                        Some(std::mem::take(&mut self.string_buffer)),
-                    ));
-                }
-                val if next_ws!() => {
-                    return Ok(Token::new(
-                        TokenType::Identifier(val.to_string()),
-                        line,
-                        col,
-                        Some(std::mem::take(&mut self.string_buffer)),
-                    ));
-                }
-                _ => {}
-            }
-            looped_char = self.next_char()?;
-        }
-        Err(Error::UnknownKeywordOrIdentifierError(
-            buffer,
-            line,
-            col,
-            std::mem::take(&mut self.string_buffer),
-        ))
+    pub fn peek_next(&mut self) -> Result<Option<Token<'a>>, Error> {
+        todo!()
     }
 }
 
 // ... Iterator and TokenizerBuffer implementations remain unchanged ...
 // They just call the methods above which now use the passed-in start coordinates.
 impl<'a> Iterator for Tokenizer<'a> {
-    type Item = Result<Token, Error>;
+    type Item = Result<Token<'a>, Error>;
     fn next(&mut self) -> Option<Self::Item> {
-        match self.next_token() {
-            Ok(Some(tok)) => Some(Ok(tok)),
-            Ok(None) => None,
-            Err(e) => Some(Err(e)),
+        match self.lexer.next() {
+            None => {
+                if self.returned_eof {
+                    None
+                } else {
+                    self.returned_eof = true;
+                    Some(Ok(Token::new(
+                        TokenType::EOF,
+                        self.lexer.extras.line_count,
+                        self.lexer.span(),
+                    )))
+                }
+            }
+            Some(t) => match t {
+                Err(e) => {
+                    todo!()
+                }
+                Ok(t) => Some(Ok(self.to_token(t))),
+            },
         }
     }
 }
 
 pub struct TokenizerBuffer<'a> {
     tokenizer: Tokenizer<'a>,
-    buffer: VecDeque<Token>,
-    history: VecDeque<Token>,
+    buffer: VecDeque<Token<'a>>,
+    history: VecDeque<Token<'a>>,
     index: i64,
 }
 
@@ -601,437 +196,3 @@ impl<'a> TokenizerBuffer<'a> {
         Ok(())
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use anyhow::Result;
-    use rust_decimal::Decimal;
-
-    const TEST_FILE: &str = "tests/file.stlg";
-
-    const TEST_STRING: &str = r#"
-        fn test() {
-            let x = 10;
-            return x + 2;
-        }
-    "#;
-
-    #[test]
-    fn test_seek_from_current() -> Result<()> {
-        let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
-        let mut buffer = TokenizerBuffer::new(tokenizer);
-
-        let token = buffer.next_token()?.unwrap();
-        assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
-
-        buffer.seek(SeekFrom::Current(1))?;
-
-        let token = buffer.next_token()?.unwrap();
-
-        assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_tokenizer_from_path_ok() {
-        let tokenizer = Tokenizer::from_path(TEST_FILE);
-        assert!(tokenizer.is_ok());
-    }
-
-    #[test]
-    fn test_tokenizer_from_path_err() {
-        let tokenizer = Tokenizer::from_path("non_existent_file.stlg");
-        assert!(tokenizer.is_err());
-    }
-
-    #[test]
-    fn test_next_char() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
-
-        let char = tokenizer.next_char()?;
-
-        assert_eq!(char, Some('\n'));
-        assert_eq!(tokenizer.line, 2);
-        assert_eq!(tokenizer.column, 1);
-
-        let mut tokenizer = Tokenizer::from(String::from("fn"));
-
-        let char = tokenizer.next_char()?;
-
-        assert_eq!(char, Some('f'));
-        assert_eq!(tokenizer.line, 1);
-        assert_eq!(tokenizer.column, 1);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_peek_next_char() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
-
-        let char = tokenizer.peek_next_char()?;
-
-        assert_eq!(char, Some('\n'));
-        assert_eq!(tokenizer.line, 1);
-        assert_eq!(tokenizer.column, 0);
-
-        let char = tokenizer.next_char()?;
-        assert_eq!(char, Some('\n'));
-        assert_eq!(tokenizer.line, 2);
-        assert_eq!(tokenizer.column, 1);
-
-        let char = tokenizer.peek_next_char()?;
-        assert_eq!(char, Some(' '));
-        assert_eq!(tokenizer.line, 2);
-        assert_eq!(tokenizer.column, 1);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_temperature_unit() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("10c 14f 10k"));
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(
-            token.token_type,
-            TokenType::Number(Number::Decimal(Decimal::new(28315, 2)))
-        );
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(
-            token.token_type,
-            TokenType::Number(Number::Decimal(Decimal::new(26315, 2)))
-        );
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_parse_integer() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("10"));
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_parse_integer_with_underscore() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("1_000"));
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(token.token_type, TokenType::Number(Number::Integer(1000)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_parse_decimal() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("10.5"));
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(
-            token.token_type,
-            TokenType::Number(Number::Decimal(Decimal::new(105, 1))) // 10.5
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_parse_decimal_with_underscore() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("1_000.000_6"));
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(
-            token.token_type,
-            TokenType::Number(Number::Decimal(Decimal::new(10000006, 4))) // 1000.0006
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_parse_number_with_symbol() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("10;"));
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
-
-        let next_char = tokenizer.next_char()?;
-
-        assert_eq!(next_char, Some(';'));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_string_parse() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from(r#""Hello, World!""#));
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(
-            token.token_type,
-            TokenType::String(String::from("Hello, World!"))
-        );
-
-        let mut tokenizer = Tokenizer::from(String::from(r#"'Hello, World!'"#));
-
-        let token = tokenizer.next_token()?.unwrap();
-
-        assert_eq!(
-            token.token_type,
-            TokenType::String(String::from("Hello, World!"))
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_symbol_parse() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from(
-            "^ ! () [] {} , . ; : + - * / < > = != && || >= <=**%",
-        ));
-
-        let expected_tokens = vec![
-            TokenType::Symbol(Symbol::Caret),
-            TokenType::Symbol(Symbol::LogicalNot),
-            TokenType::Symbol(Symbol::LParen),
-            TokenType::Symbol(Symbol::RParen),
-            TokenType::Symbol(Symbol::LBracket),
-            TokenType::Symbol(Symbol::RBracket),
-            TokenType::Symbol(Symbol::LBrace),
-            TokenType::Symbol(Symbol::RBrace),
-            TokenType::Symbol(Symbol::Comma),
-            TokenType::Symbol(Symbol::Dot),
-            TokenType::Symbol(Symbol::Semicolon),
-            TokenType::Symbol(Symbol::Colon),
-            TokenType::Symbol(Symbol::Plus),
-            TokenType::Symbol(Symbol::Minus),
-            TokenType::Symbol(Symbol::Asterisk),
-            TokenType::Symbol(Symbol::Slash),
-            TokenType::Symbol(Symbol::LessThan),
-            TokenType::Symbol(Symbol::GreaterThan),
-            TokenType::Symbol(Symbol::Assign),
-            TokenType::Symbol(Symbol::NotEqual),
-            TokenType::Symbol(Symbol::LogicalAnd),
-            TokenType::Symbol(Symbol::LogicalOr),
-            TokenType::Symbol(Symbol::GreaterThanOrEqual),
-            TokenType::Symbol(Symbol::LessThanOrEqual),
-            TokenType::Symbol(Symbol::Exp),
-            TokenType::Symbol(Symbol::Percent),
-        ];
-
-        for expected_token in expected_tokens {
-            let token = tokenizer.next_token()?.unwrap();
-
-            assert_eq!(token.token_type, expected_token);
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_keyword_parse() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from(
-            "let fn if else return enum continue break const",
-        ));
-
-        let expected_tokens = vec![
-            TokenType::Keyword(Keyword::Let),
-            TokenType::Keyword(Keyword::Fn),
-            TokenType::Keyword(Keyword::If),
-            TokenType::Keyword(Keyword::Else),
-            TokenType::Keyword(Keyword::Return),
-            TokenType::Keyword(Keyword::Enum),
-            TokenType::Keyword(Keyword::Continue),
-            TokenType::Keyword(Keyword::Break),
-            TokenType::Keyword(Keyword::Const),
-        ];
-
-        for expected_token in expected_tokens {
-            let token = tokenizer.next_token()?.unwrap();
-
-            assert_eq!(token.token_type, expected_token);
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_identifier_parse() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("fn test fn test_underscores"));
-
-        let token = tokenizer.next_token()?.unwrap();
-        assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
-        let token = tokenizer.next_token()?.unwrap();
-        assert_eq!(
-            token.token_type,
-            TokenType::Identifier(String::from("test"))
-        );
-        let token = tokenizer.next_token()?.unwrap();
-        assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
-        let token = tokenizer.next_token()?.unwrap();
-        assert_eq!(
-            token.token_type,
-            TokenType::Identifier(String::from("test_underscores"))
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_boolean_parse() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("true false"));
-
-        let token = tokenizer.next_token()?.unwrap();
-        assert_eq!(token.token_type, TokenType::Boolean(true));
-        let token = tokenizer.next_token()?.unwrap();
-        assert_eq!(token.token_type, TokenType::Boolean(false));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_full_source() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
-
-        let expected_tokens = vec![
-            TokenType::Keyword(Keyword::Fn),
-            TokenType::Identifier(String::from("test")),
-            TokenType::Symbol(Symbol::LParen),
-            TokenType::Symbol(Symbol::RParen),
-            TokenType::Symbol(Symbol::LBrace),
-            TokenType::Keyword(Keyword::Let),
-            TokenType::Identifier(String::from("x")),
-            TokenType::Symbol(Symbol::Assign),
-            TokenType::Number(Number::Integer(10)),
-            TokenType::Symbol(Symbol::Semicolon),
-            TokenType::Keyword(Keyword::Return),
-            TokenType::Identifier(String::from("x")),
-            TokenType::Symbol(Symbol::Plus),
-            TokenType::Number(Number::Integer(2)),
-            TokenType::Symbol(Symbol::Semicolon),
-            TokenType::Symbol(Symbol::RBrace),
-        ];
-
-        for expected_token in expected_tokens {
-            let token = tokenizer.next_token()?.unwrap();
-
-            assert_eq!(token.token_type, expected_token);
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_peek_next() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
-
-        let column = tokenizer.column;
-        let line = tokenizer.line;
-
-        let peeked_token = tokenizer.peek_next()?;
-
-        assert_eq!(
-            peeked_token.unwrap().token_type,
-            TokenType::Keyword(Keyword::Fn)
-        );
-        assert_eq!(tokenizer.column, column);
-        assert_eq!(tokenizer.line, line);
-
-        let next_token = tokenizer.next_token()?;
-
-        assert_eq!(
-            next_token.unwrap().token_type,
-            TokenType::Keyword(Keyword::Fn)
-        );
-        assert_ne!(tokenizer.column, column);
-        assert_ne!(tokenizer.line, line);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_compact_syntax() -> Result<()> {
-        let mut tokenizer = Tokenizer::from(String::from("if(true) while(false)"));
-
-        // if(true)
-        assert_eq!(
-            tokenizer.next_token()?.unwrap().token_type,
-            TokenType::Keyword(Keyword::If)
-        );
-        assert_eq!(
-            tokenizer.next_token()?.unwrap().token_type,
-            TokenType::Symbol(Symbol::LParen)
-        );
-        assert_eq!(
-            tokenizer.next_token()?.unwrap().token_type,
-            TokenType::Boolean(true)
-        );
-        assert_eq!(
-            tokenizer.next_token()?.unwrap().token_type,
-            TokenType::Symbol(Symbol::RParen)
-        );
-
-        // while(false)
-        assert_eq!(
-            tokenizer.next_token()?.unwrap().token_type,
-            TokenType::Keyword(Keyword::While)
-        );
-        assert_eq!(
-            tokenizer.next_token()?.unwrap().token_type,
-            TokenType::Symbol(Symbol::LParen)
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_identifier_has_correct_length() -> Result<()> {
-        let mut tokenizer = Tokenizer::from("hello");
-        assert_eq!(
-            tokenizer.next_token()?,
-            Some(Token {
-                token_type: TokenType::Identifier("hello".into()),
-                original_string: Some("hello".into()),
-                column: 1,
-                line: 1
-            })
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_keyword_token_has_correct_length() -> Result<()> {
-        let mut tokenizer = Tokenizer::from("while");
-
-        assert_eq!(
-            tokenizer.next_token()?,
-            Some(Token {
-                token_type: TokenType::Keyword(Keyword::While),
-                original_string: Some("while".into()),
-                column: 1,
-                line: 1
-            })
-        );
-
-        Ok(())
-    }
-}
diff --git a/rust_compiler/libs/tokenizer/src/token.rs b/rust_compiler/libs/tokenizer/src/token.rs
index 2233da3..53181d4 100644
--- a/rust_compiler/libs/tokenizer/src/token.rs
+++ b/rust_compiler/libs/tokenizer/src/token.rs
@@ -1,6 +1,57 @@
 use helpers::prelude::*;
-use logos::{Lexer, Logos};
+use logos::{Lexer, Logos, Skip, Span};
+use lsp_types::{Diagnostic, DiagnosticSeverity, Position, Range};
 use rust_decimal::Decimal;
+use thiserror::Error;
+
+#[derive(Debug, Error, Default, Clone, PartialEq)]
+pub enum LexError {
+    #[error("Attempted to parse an invalid number: {2}")]
+    NumberParseError(usize, Span, String),
+
+    #[error("An invalid character was found in token stream: {2}")]
+    InvalidInput(usize, Span, String),
+
+    #[default]
+    #[error("An unknown error occurred")]
+    Other,
+}
+
+impl From<LexError> for Diagnostic {
+    fn from(value: LexError) -> Self {
+        match value {
+            LexError::NumberParseError(line, col, str) | LexError::InvalidInput(line, col, str) => {
+                Diagnostic {
+                    range: Range {
+                        start: Position {
+                            character: col.start as u32,
+                            line: line as u32,
+                        },
+                        end: Position {
+                            line: line as u32,
+                            character: col.end as u32,
+                        },
+                    },
+                    severity: Some(DiagnosticSeverity::ERROR),
+                    message: str,
+                    ..Default::default()
+                }
+            }
+            _ => todo!(),
+        }
+    }
+}
+
+impl LexError {
+    pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Self {
+        let mut span = lex.span();
+        let line = lex.extras.line_count;
+        span.start -= lex.extras.line_start_index;
+        span.end -= lex.extras.line_start_index;
+
+        Self::InvalidInput(line, span, lex.slice().chars().as_str().to_string())
+    }
+}
 
 // Define a local macro to consume the list
 macro_rules! generate_check {
@@ -11,29 +62,34 @@ macro_rules! generate_check {
     }
 }
 
-#[derive(Debug, PartialEq, Eq, Clone)]
-pub struct Token {
-    /// The type of the token
-    pub token_type: TokenType,
-    /// The line where the token was found
-    pub line: usize,
-    /// The column where the token was found
-    pub column: usize,
-    pub original_string: Option<String>,
+#[derive(Default)]
+pub struct Extras {
+    pub line_count: usize,
+    pub line_start_index: usize,
 }
 
-impl Token {
-    pub fn new(
-        token_type: TokenType,
-        line: usize,
-        column: usize,
-        original: Option<String>,
-    ) -> Self {
+fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Skip {
+    lex.extras.line_count += 1;
+    lex.extras.line_start_index = lex.span().end;
+    Skip
+}
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Token<'a> {
+    /// The type of the token
+    pub token_type: TokenType<'a>,
+    /// The line where the token was found
+    pub line: usize,
+    /// The span where the token starts and ends
+    pub span: Span,
+}
+
+impl<'a> Token<'a> {
+    pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self {
         Self {
             token_type,
             line,
-            column,
-            original_string: original,
+            span,
         }
     }
 }
@@ -93,13 +149,19 @@ macro_rules! keyword {
 }
 
 #[derive(Debug, PartialEq, Hash, Eq, Clone, Logos)]
-pub enum TokenType {
+#[logos(skip r"[ \t\f]+")]
+#[logos(extras = Extras)]
+#[logos(error(LexError, LexError::from_lexer))]
+pub enum TokenType<'a> {
+    #[regex(r"\n", update_line_index)]
+    Newline,
+
     // matches strings with double quotes
-    #[regex(r#""(?:[^"\\]|\\.)*""#, |v| v.slice().to_string())]
+    #[regex(r#""(?:[^"\\]|\\.)*""#)]
     // matches strings with single quotes
-    #[regex(r#"'(?:[^'\\]|\\.)*'"#, |v| v.slice().to_string())]
+    #[regex(r#"'(?:[^'\\]|\\.)*'"#)]
     /// Represents a string token
-    String(String),
+    String(&'a str),
 
     #[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)]
     /// Represents a number token
@@ -125,9 +187,9 @@ pub enum TokenType {
     /// Represents a keyword token
     Keyword(Keyword),
 
-    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| v.slice().to_string())]
+    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
     /// Represents an identifier token
-    Identifier(String),
+    Identifier(&'a str),
 
     #[token("(", symbol!(LParen))]
     #[token(")", symbol!(RParen))]
@@ -159,10 +221,10 @@ pub enum TokenType {
     /// Represents a symbol token
     Symbol(Symbol),
 
-    #[regex(r"///[\n]*", |val| Comment::Doc(val.slice()[3..].trim().to_string()))]
-    #[regex(r"//[\n]*", |val| Comment::Line(val.slice()[2..].trim().to_string()))]
+    #[regex(r"///[\n]*", |val| Comment::Doc(val.slice()[3..].trim()))]
+    #[regex(r"//[\n]*", |val| Comment::Line(val.slice()[2..].trim()))]
     /// Represents a comment, both a line comment and a doc comment
-    Comment(Comment),
+    Comment(Comment<'a>),
 
     #[end]
     /// Represents an end of file token
@@ -170,14 +232,14 @@ pub enum TokenType {
 }
 
 #[derive(Hash, Debug, Eq, PartialEq, Clone)]
-pub enum Comment {
-    Line(String),
-    Doc(String),
+pub enum Comment<'a> {
+    Line(&'a str),
+    Doc(&'a str),
 }
 
-fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType>) -> Option<Number> {
+fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result<Number, LexError> {
     let slice = lexer.slice();
-    let last_char = slice.chars().last()?;
+    let last_char = slice.chars().last().unwrap_or_default();
     let (num_str, suffix) = match last_char {
         'c' | 'k' | 'f' => (&slice[..slice.len() - 1], Some(last_char)),
         _ => (slice, None),
@@ -189,28 +251,39 @@ fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType>) -> Option<Number> {
         num_str.to_string()
     };
 
+    let line = lexer.extras.line_count;
+    let mut span = lexer.span();
+    span.end -= lexer.extras.line_start_index;
+    span.start -= lexer.extras.line_start_index;
+
     let num = if clean_str.contains('.') {
-        Number::Decimal(clean_str.parse::<Decimal>().ok()?)
+        Number::Decimal(
+            clean_str
+                .parse::<Decimal>()
+                .map_err(|_| LexError::NumberParseError(line, span, slice.to_string()))?,
+        )
     } else {
-        Number::Integer(clean_str.parse::<i128>().ok()?)
+        Number::Integer(
+            clean_str
+                .parse::<i128>()
+                .map_err(|_| LexError::NumberParseError(line, span, slice.to_string()))?,
+        )
     };
 
     if let Some(suffix) = suffix {
-        Some(
-            match suffix {
-                'c' => Temperature::Celsius(num),
-                'f' => Temperature::Fahrenheit(num),
-                'k' => Temperature::Kelvin(num),
-                _ => unreachable!(),
-            }
-            .to_kelvin(),
-        )
+        Ok(match suffix {
+            'c' => Temperature::Celsius(num),
+            'f' => Temperature::Fahrenheit(num),
+            'k' => Temperature::Kelvin(num),
+            _ => unreachable!(),
+        }
+        .to_kelvin())
     } else {
-        Some(num)
+        Ok(num)
     }
 }
 
-impl std::fmt::Display for Comment {
+impl<'a> std::fmt::Display for Comment<'a> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             Self::Line(c) => write!(f, "// {}", c),
@@ -227,7 +300,7 @@ impl std::fmt::Display for Comment {
     }
 }
 
-impl Documentation for TokenType {
+impl<'a> Documentation for TokenType<'a> {
     fn docs(&self) -> String {
         match self {
             Self::Keyword(k) => k.docs(),
@@ -242,7 +315,7 @@ impl Documentation for TokenType {
 
 helpers::with_syscalls!(generate_check);
 
-impl From<TokenType> for u32 {
+impl<'a> From<TokenType<'a>> for u32 {
     fn from(value: TokenType) -> Self {
         match value {
             TokenType::String(_) => 1,
@@ -277,12 +350,12 @@ impl From<TokenType> for u32 {
                     7
                 }
             }
-            TokenType::EOF => 0,
+            _ => 0,
         }
     }
 }
 
-impl std::fmt::Display for TokenType {
+impl<'a> std::fmt::Display for TokenType<'a> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             TokenType::String(s) => write!(f, "{}", s),
@@ -293,6 +366,7 @@ impl std::fmt::Display for TokenType {
             TokenType::Symbol(s) => write!(f, "{}", s),
             TokenType::Comment(c) => write!(f, "{}", c),
             TokenType::EOF => write!(f, "EOF"),
+            _ => write!(f, ""),
         }
     }
 }