Lexer impl done

This commit is contained in:
2025-12-08 23:19:23 -07:00
parent 115a57128c
commit fac36c756b
3 changed files with 44 additions and 49 deletions

View File

@@ -8,6 +8,7 @@ use crate::sys_call::{Math, System};
use quick_error::quick_error; use quick_error::quick_error;
use std::io::SeekFrom; use std::io::SeekFrom;
use sys_call::SysCall; use sys_call::SysCall;
use thiserror::Error;
use tokenizer::{ use tokenizer::{
self, Tokenizer, TokenizerBuffer, self, Tokenizer, TokenizerBuffer,
token::{Keyword, Symbol, Token, TokenType}, token::{Keyword, Symbol, Token, TokenType},
@@ -26,33 +27,28 @@ macro_rules! boxed {
}; };
} }
quick_error! { #[derive(Error, Debug)]
#[derive(Debug)] pub enum Error<'a> {
pub enum Error { #[error("Tokenizer Error: {0}")]
TokenizerError(err: tokenizer::Error) { TokenizerError(#[from] tokenizer::Error),
from()
display("Tokenizer Error: {}", err) #[error("Unexpected token: {1}")]
source(err) UnexpectedToken(Span, Token<'a>),
}
UnexpectedToken(span: Span, token: Token) { #[error("Duplicate identifier: {1}")]
display("Unexpected token: {}", token.token_type) DuplicateIdentifier(Span, Token<'a>),
}
DuplicateIdentifier(span: Span, token: Token) { #[error("Invalid Syntax: {1}")]
display("Duplicate identifier: {}", token.token_type) InvalidSyntax(Span, Token<'a>),
}
InvalidSyntax(span: Span, reason: String) { #[error("Unsupported Keyword: {1}")]
display("Invalid syntax: {}", reason) UnsupportedKeyword(Span, Token<'a>),
}
UnsupportedKeyword(span: Span, token: Token) { #[error("Unexpected End of File")]
display("Unsupported keyword: {}", token.token_type) UnexpectedEOF,
}
UnexpectedEOF {
display("Unexpected EOF")
}
}
} }
impl From<Error> for lsp_types::Diagnostic { impl<'a> From<Error<'a>> for lsp_types::Diagnostic {
fn from(value: Error) -> Self { fn from(value: Error) -> Self {
use Error::*; use Error::*;
use lsp_types::*; use lsp_types::*;
@@ -112,7 +108,7 @@ macro_rules! self_matches_current {
pub struct Parser<'a> { pub struct Parser<'a> {
tokenizer: TokenizerBuffer<'a>, tokenizer: TokenizerBuffer<'a>,
current_token: Option<Token<'a>>, current_token: Option<Token<'a>>,
pub errors: Vec<Error>, pub errors: Vec<Error<'a>>,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
@@ -159,18 +155,15 @@ impl<'a> Parser<'a> {
let (start_line, start_col) = start_token let (start_line, start_col) = start_token
.as_ref() .as_ref()
.map(|t| (t.line, t.column)) .map(|t| (t.line, t.span.start))
.unwrap_or((1, 1)); .unwrap_or((1, 1));
let node = parser(self)?; let node = parser(self)?;
let end_token = self.current_token.as_ref(); let end_token = self.current_token;
let (end_line, end_col) = end_token let (end_line, end_col) = end_token
.map(|t| { .map(|t| (t.line, t.span.end))
let len = t.original_string.as_ref().map(|s| s.len()).unwrap_or(0);
(t.line, t.column + len)
})
.unwrap_or((start_line, start_col)); .unwrap_or((start_line, start_col));
Ok(Spanned { Ok(Spanned {

View File

@@ -5,9 +5,7 @@ use quick_error::quick_error;
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
collections::VecDeque, collections::VecDeque,
io::{BufReader, Cursor, Read, Seek, SeekFrom}, io::{Read, Seek, SeekFrom},
iter::Peekable,
path::PathBuf,
}; };
use token::{Token, TokenType}; use token::{Token, TokenType};
@@ -60,7 +58,7 @@ impl<'a> From<&'a str> for Tokenizer<'a> {
} }
impl<'a> Tokenizer<'a> { impl<'a> Tokenizer<'a> {
fn to_token(&mut self, t_type: TokenType<'a>) -> Token<'a> { fn get_token(&mut self, t_type: TokenType<'a>) -> Token<'a> {
let mut span = self.lexer.span(); let mut span = self.lexer.span();
span.start -= self.lexer.extras.line_start_index; span.start -= self.lexer.extras.line_start_index;
span.end -= self.lexer.extras.line_start_index; span.end -= self.lexer.extras.line_start_index;
@@ -72,14 +70,10 @@ impl<'a> Tokenizer<'a> {
.lexer .lexer
.next() .next()
.transpose() .transpose()
.map(|t| t.map(|t| self.to_token(t)))?; .map(|t| t.map(|t| self.get_token(t)))?;
Ok(to_return) Ok(to_return)
} }
pub fn peek_next(&mut self) -> Result<Option<Token<'a>>, Error> {
todo!()
}
} }
// ... Iterator and TokenizerBuffer implementations remain unchanged ... // ... Iterator and TokenizerBuffer implementations remain unchanged ...
@@ -101,10 +95,8 @@ impl<'a> Iterator for Tokenizer<'a> {
} }
} }
Some(t) => match t { Some(t) => match t {
Err(e) => { Err(e) => Some(Err(e.into())),
todo!() Ok(t) => Some(Ok(self.get_token(t))),
}
Ok(t) => Some(Ok(self.to_token(t))),
}, },
} }
} }
@@ -126,7 +118,7 @@ impl<'a> TokenizerBuffer<'a> {
index: 0, index: 0,
} }
} }
pub fn next_token(&mut self) -> Result<Option<Token>, Error> { pub fn next_token(&mut self) -> Result<Option<Token<'a>>, Error> {
if let Some(token) = self.buffer.pop_front() { if let Some(token) = self.buffer.pop_front() {
self.history.push_back(token.clone()); self.history.push_back(token.clone());
self.index += 1; self.index += 1;
@@ -141,12 +133,16 @@ impl<'a> TokenizerBuffer<'a> {
self.index += 1; self.index += 1;
Ok(token) Ok(token)
} }
pub fn peek(&mut self) -> Result<Option<Token>, Error> { pub fn peek(&mut self) -> Result<Option<Token<'a>>, Error> {
if let Some(token) = self.buffer.front() { if let Some(token) = self.buffer.front() {
return Ok(Some(token.clone())); return Ok(Some(token.clone()));
} }
let token = self.tokenizer.peek_next()?;
Ok(token) let Some(new_token) = self.tokenizer.next_token()? else {
return Ok(None);
};
self.buffer.push_front(new_token.clone());
Ok(Some(new_token))
} }
pub fn loc(&self) -> i64 { pub fn loc(&self) -> i64 {
self.index self.index

View File

@@ -84,6 +84,12 @@ pub struct Token<'a> {
pub span: Span, pub span: Span,
} }
impl<'a> std::fmt::Display for Token<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.token_type)
}
}
impl<'a> Token<'a> { impl<'a> Token<'a> {
pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self { pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self {
Self { Self {