From fac36c756b442526437eec06c135d881e929353e Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Mon, 8 Dec 2025 23:19:23 -0700 Subject: [PATCH] Lexer impl done --- rust_compiler/libs/parser/src/lib.rs | 57 ++++++++++------------- rust_compiler/libs/tokenizer/src/lib.rs | 30 ++++++------ rust_compiler/libs/tokenizer/src/token.rs | 6 +++ 3 files changed, 44 insertions(+), 49 deletions(-) diff --git a/rust_compiler/libs/parser/src/lib.rs b/rust_compiler/libs/parser/src/lib.rs index f00150d..2e5df7b 100644 --- a/rust_compiler/libs/parser/src/lib.rs +++ b/rust_compiler/libs/parser/src/lib.rs @@ -8,6 +8,7 @@ use crate::sys_call::{Math, System}; use quick_error::quick_error; use std::io::SeekFrom; use sys_call::SysCall; +use thiserror::Error; use tokenizer::{ self, Tokenizer, TokenizerBuffer, token::{Keyword, Symbol, Token, TokenType}, @@ -26,33 +27,28 @@ macro_rules! boxed { }; } -quick_error! { - #[derive(Debug)] - pub enum Error { - TokenizerError(err: tokenizer::Error) { - from() - display("Tokenizer Error: {}", err) - source(err) - } - UnexpectedToken(span: Span, token: Token) { - display("Unexpected token: {}", token.token_type) - } - DuplicateIdentifier(span: Span, token: Token) { - display("Duplicate identifier: {}", token.token_type) - } - InvalidSyntax(span: Span, reason: String) { - display("Invalid syntax: {}", reason) - } - UnsupportedKeyword(span: Span, token: Token) { - display("Unsupported keyword: {}", token.token_type) - } - UnexpectedEOF { - display("Unexpected EOF") - } - } +#[derive(Error, Debug)] +pub enum Error<'a> { + #[error("Tokenizer Error: {0}")] + TokenizerError(#[from] tokenizer::Error), + + #[error("Unexpected token: {1}")] + UnexpectedToken(Span, Token<'a>), + + #[error("Duplicate identifier: {1}")] + DuplicateIdentifier(Span, Token<'a>), + + #[error("Invalid Syntax: {1}")] + InvalidSyntax(Span, Token<'a>), + + #[error("Unsupported Keyword: {1}")] + UnsupportedKeyword(Span, Token<'a>), + + #[error("Unexpected End of File")] + UnexpectedEOF, } -impl From for lsp_types::Diagnostic { +impl<'a> From> for lsp_types::Diagnostic { fn from(value: Error) -> Self { use Error::*; use lsp_types::*; @@ -112,7 +108,7 @@ macro_rules! self_matches_current { pub struct Parser<'a> { tokenizer: TokenizerBuffer<'a>, current_token: Option>, - pub errors: Vec, + pub errors: Vec>, } impl<'a> Parser<'a> { @@ -159,18 +155,15 @@ impl<'a> Parser<'a> { let (start_line, start_col) = start_token .as_ref() - .map(|t| (t.line, t.column)) + .map(|t| (t.line, t.span.start)) .unwrap_or((1, 1)); let node = parser(self)?; - let end_token = self.current_token.as_ref(); + let end_token = self.current_token; let (end_line, end_col) = end_token - .map(|t| { - let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0); - (t.line, t.column + len) - }) + .map(|t| (t.line, t.span.end)) .unwrap_or((start_line, start_col)); Ok(Spanned { diff --git a/rust_compiler/libs/tokenizer/src/lib.rs b/rust_compiler/libs/tokenizer/src/lib.rs index 44b2223..adbe420 100644 --- a/rust_compiler/libs/tokenizer/src/lib.rs +++ b/rust_compiler/libs/tokenizer/src/lib.rs @@ -5,9 +5,7 @@ use quick_error::quick_error; use std::{ cmp::Ordering, collections::VecDeque, - io::{BufReader, Cursor, Read, Seek, SeekFrom}, - iter::Peekable, - path::PathBuf, + io::{Read, Seek, SeekFrom}, }; use token::{Token, TokenType}; @@ -60,7 +58,7 @@ impl<'a> From<&'a str> for Tokenizer<'a> { } impl<'a> Tokenizer<'a> { - fn to_token(&mut self, t_type: TokenType<'a>) -> Token<'a> { + fn get_token(&mut self, t_type: TokenType<'a>) -> Token<'a> { let mut span = self.lexer.span(); span.start -= self.lexer.extras.line_start_index; span.end -= self.lexer.extras.line_start_index; @@ -72,14 +70,10 @@ impl<'a> Tokenizer<'a> { .lexer .next() .transpose() - .map(|t| t.map(|t| self.to_token(t)))?; + .map(|t| t.map(|t| self.get_token(t)))?; Ok(to_return) } - - pub fn peek_next(&mut self) -> Result>, Error> { - todo!() - } } // ... Iterator and TokenizerBuffer implementations remain unchanged ... @@ -101,10 +95,8 @@ impl<'a> Iterator for Tokenizer<'a> { } } Some(t) => match t { - Err(e) => { - todo!() - } - Ok(t) => Some(Ok(self.to_token(t))), + Err(e) => Some(Err(e.into())), + Ok(t) => Some(Ok(self.get_token(t))), }, } } @@ -126,7 +118,7 @@ impl<'a> TokenizerBuffer<'a> { index: 0, } } - pub fn next_token(&mut self) -> Result, Error> { + pub fn next_token(&mut self) -> Result>, Error> { if let Some(token) = self.buffer.pop_front() { self.history.push_back(token.clone()); self.index += 1; @@ -141,12 +133,16 @@ impl<'a> TokenizerBuffer<'a> { self.index += 1; Ok(token) } - pub fn peek(&mut self) -> Result, Error> { + pub fn peek(&mut self) -> Result>, Error> { if let Some(token) = self.buffer.front() { return Ok(Some(token.clone())); } - let token = self.tokenizer.peek_next()?; - Ok(token) + + let Some(new_token) = self.tokenizer.next_token()? else { + return Ok(None); + }; + self.buffer.push_front(new_token.clone()); + Ok(Some(new_token)) } pub fn loc(&self) -> i64 { self.index diff --git a/rust_compiler/libs/tokenizer/src/token.rs b/rust_compiler/libs/tokenizer/src/token.rs index 53181d4..7e5f4e8 100644 --- a/rust_compiler/libs/tokenizer/src/token.rs +++ b/rust_compiler/libs/tokenizer/src/token.rs @@ -84,6 +84,12 @@ pub struct Token<'a> { pub span: Span, } +impl<'a> std::fmt::Display for Token<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.token_type) + } +} + impl<'a> Token<'a> { pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self { Self {