Files
stationeers_lang/rust_compiler/libs/tokenizer/src/lib.rs

189 lines
5.5 KiB
Rust

pub mod token;
use logos::{Lexer, Logos};
use std::{
cmp::Ordering,
collections::VecDeque,
io::{Read, Seek, SeekFrom},
};
use thiserror::Error;
use token::{Token, TokenType};
#[derive(Error, Debug)]
pub enum Error {
#[error(transparent)]
IOError(#[from()] std::io::Error),
#[error(transparent)]
LexError(#[from] token::LexError),
}
impl From<Error> for lsp_types::Diagnostic {
fn from(value: Error) -> Self {
use Error::*;
use lsp_types::*;
match value {
IOError(e) => Diagnostic {
message: e.to_string(),
severity: Some(DiagnosticSeverity::ERROR),
..Default::default()
},
LexError(e) => e.into(),
}
}
}
pub trait Tokenize: Read + Seek {}
impl<T> Tokenize for T where T: Read + Seek {}
pub struct Tokenizer<'a> {
lexer: Lexer<'a, TokenType<'a>>,
returned_eof: bool,
}
impl<'a> From<&'a str> for Tokenizer<'a> {
fn from(value: &'a str) -> Self {
Self {
lexer: TokenType::lexer(value),
returned_eof: false,
}
}
}
impl<'a> Tokenizer<'a> {
fn get_token(&mut self, t_type: TokenType<'a>) -> Token<'a> {
let mut span = self.lexer.span();
span.start -= self.lexer.extras.line_start_index;
span.end -= self.lexer.extras.line_start_index;
Token::new(t_type, self.lexer.extras.line_count, span)
}
pub fn next_token(&mut self) -> Result<Option<Token<'a>>, Error> {
let mut current = self.lexer.next().transpose();
while matches!(current, Ok(Some(TokenType::Comment(_)))) {
current = self.lexer.next().transpose();
}
Ok(current.map(|t| t.map(|t| self.get_token(t)))?)
}
}
// ... Iterator and TokenizerBuffer implementations remain unchanged ...
// They just call the methods above which now use the passed-in start coordinates.
impl<'a> Iterator for Tokenizer<'a> {
type Item = Result<Token<'a>, Error>;
fn next(&mut self) -> Option<Self::Item> {
match self.lexer.next() {
None => {
if self.returned_eof {
None
} else {
self.returned_eof = true;
Some(Ok(Token::new(
TokenType::EOF,
self.lexer.extras.line_count,
self.lexer.span(),
)))
}
}
Some(t) => match t {
Err(e) => Some(Err(e.into())),
Ok(t) => Some(Ok(self.get_token(t))),
},
}
}
}
pub struct TokenizerBuffer<'a> {
tokenizer: Tokenizer<'a>,
buffer: VecDeque<Token<'a>>,
history: VecDeque<Token<'a>>,
index: i64,
}
impl<'a> TokenizerBuffer<'a> {
pub fn new(tokenizer: Tokenizer<'a>) -> Self {
Self {
tokenizer,
buffer: VecDeque::new(),
history: VecDeque::with_capacity(128),
index: 0,
}
}
pub fn next_token(&mut self) -> Result<Option<Token<'a>>, Error> {
if let Some(token) = self.buffer.pop_front() {
self.history.push_back(token.clone());
self.index += 1;
return Ok(Some(token));
}
let token = self.tokenizer.next_token()?;
if let Some(ref token) = token {
self.history.push_back(token.clone());
}
self.index += 1;
Ok(token)
}
pub fn peek(&mut self) -> Result<Option<Token<'a>>, Error> {
if let Some(token) = self.buffer.front() {
return Ok(Some(token.clone()));
}
let Some(new_token) = self.tokenizer.next_token()? else {
return Ok(None);
};
self.buffer.push_front(new_token.clone());
Ok(Some(new_token))
}
pub fn loc(&self) -> i64 {
self.index
}
fn seek_from_current(&mut self, seek_to_int: i64) -> Result<(), Error> {
use Ordering::*;
match seek_to_int.cmp(&0) {
Greater => {
let mut tokens = Vec::with_capacity(seek_to_int as usize);
for _ in 0..seek_to_int {
if let Some(token) = self.tokenizer.next_token()? {
tokens.push(token);
} else {
return Err(Error::IOError(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"Unexpected EOF",
)));
}
}
self.history.extend(tokens);
}
Less => {
let seek_to = seek_to_int.unsigned_abs() as usize;
let mut tokens = Vec::with_capacity(seek_to);
for _ in 0..seek_to {
if let Some(token) = self.history.pop_back() {
tokens.push(token);
} else {
return Err(Error::IOError(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"Unexpected EOF",
)));
}
}
self.index -= seek_to_int;
self.buffer.extend(tokens.into_iter().rev());
}
_ => {}
}
Ok(())
}
pub fn seek(&mut self, from: SeekFrom) -> Result<(), Error> {
match from {
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
_ => unimplemented!("SeekFrom::End/Start not implemented"),
}
Ok(())
}
}