TEST -- use Cow instead of String for tokens

This commit is contained in:
2025-12-09 13:17:35 -07:00
parent 080b5320f7
commit d40b759442
4 changed files with 140 additions and 138 deletions

View File

@@ -38,7 +38,7 @@ pub trait Tokenize: Read + Seek {}
impl<T> Tokenize for T where T: Read + Seek {}
pub struct Tokenizer<'a> {
lexer: Lexer<'a, TokenType>,
lexer: Lexer<'a, TokenType<'a>>,
returned_eof: bool,
}
@@ -52,14 +52,14 @@ impl<'a> From<&'a str> for Tokenizer<'a> {
}
impl<'a> Tokenizer<'a> {
fn get_token(&mut self, t_type: TokenType) -> Token {
fn get_token(&mut self, t_type: TokenType<'a>) -> Token<'a> {
let mut span = self.lexer.span();
span.start -= self.lexer.extras.line_start_index;
span.end -= self.lexer.extras.line_start_index;
Token::new(t_type, self.lexer.extras.line_count, span)
}
pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
pub fn next_token(&mut self) -> Result<Option<Token<'a>>, Error> {
let mut current = self.lexer.next().transpose();
while matches!(current, Ok(Some(TokenType::Comment(_)))) {
@@ -73,7 +73,7 @@ impl<'a> Tokenizer<'a> {
// ... Iterator and TokenizerBuffer implementations remain unchanged ...
// They just call the methods above which now use the passed-in start coordinates.
impl<'a> Iterator for Tokenizer<'a> {
type Item = Result<Token, Error>;
type Item = Result<Token<'a>, Error>;
fn next(&mut self) -> Option<Self::Item> {
match self.lexer.next() {
None => {
@@ -98,8 +98,8 @@ impl<'a> Iterator for Tokenizer<'a> {
pub struct TokenizerBuffer<'a> {
tokenizer: Tokenizer<'a>,
buffer: VecDeque<Token>,
history: VecDeque<Token>,
buffer: VecDeque<Token<'a>>,
history: VecDeque<Token<'a>>,
index: i64,
}
@@ -112,7 +112,7 @@ impl<'a> TokenizerBuffer<'a> {
index: 0,
}
}
pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
pub fn next_token(&mut self) -> Result<Option<Token<'a>>, Error> {
if let Some(token) = self.buffer.pop_front() {
self.history.push_back(token.clone());
self.index += 1;
@@ -127,7 +127,7 @@ impl<'a> TokenizerBuffer<'a> {
self.index += 1;
Ok(token)
}
pub fn peek(&mut self) -> Result<Option<Token>, Error> {
pub fn peek(&mut self) -> Result<Option<Token<'a>>, Error> {
if let Some(token) = self.buffer.front() {
return Ok(Some(token.clone()));
}

View File

@@ -1,3 +1,5 @@
use std::borrow::Cow;
use helpers::prelude::*;
use logos::{Lexer, Logos, Skip, Span};
use lsp_types::{Diagnostic, DiagnosticSeverity, Position, Range};
@@ -43,7 +45,7 @@ impl From<LexError> for Diagnostic {
}
impl LexError {
pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType>) -> Self {
pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Self {
let mut span = lex.span();
let line = lex.extras.line_count;
span.start -= lex.extras.line_start_index;
@@ -68,30 +70,30 @@ pub struct Extras {
pub line_start_index: usize,
}
fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType>) -> Skip {
fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Skip {
lex.extras.line_count += 1;
lex.extras.line_start_index = lex.span().end;
Skip
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token {
pub struct Token<'a> {
/// The type of the token
pub token_type: TokenType,
pub token_type: TokenType<'a>,
/// The line where the token was found
pub line: usize,
/// The span where the token starts and ends
pub span: Span,
}
impl std::fmt::Display for Token {
impl<'a> std::fmt::Display for Token<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.token_type)
}
}
impl Token {
pub fn new(token_type: TokenType, line: usize, span: Span) -> Self {
impl<'a> Token<'a> {
pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self {
Self {
token_type,
line,
@@ -158,22 +160,22 @@ macro_rules! keyword {
#[logos(skip r"[ \t\f]+")]
#[logos(extras = Extras)]
#[logos(error(LexError, LexError::from_lexer))]
pub enum TokenType {
pub enum TokenType<'a> {
#[regex(r"\n", update_line_index)]
Newline,
// matches strings with double quotes
#[regex(r#""(?:[^"\\]|\\.)*""#, |v| {
let str = v.slice();
str[1..str.len() - 1].to_string()
Cow::from(&str[1..str.len() - 1])
})]
// matches strings with single quotes
#[regex(r#"'(?:[^'\\]|\\.)*'"#, |v| {
let str = v.slice();
str[1..str.len() - 1].to_string()
Cow::from(&str[1..str.len() - 1])
})]
/// Represents a string token
String(String),
String(Cow<'a, str>),
#[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)]
/// Represents a number token
@@ -199,9 +201,9 @@ pub enum TokenType {
/// Represents a keyword token
Keyword(Keyword),
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| v.slice().to_string())]
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| Cow::from(v.slice()))]
/// Represents an identifier token
Identifier(String),
Identifier(Cow<'a, str>),
#[token("(", symbol!(LParen))]
#[token(")", symbol!(RParen))]
@@ -236,29 +238,29 @@ pub enum TokenType {
#[token("//", |lex| Comment::Line(read_line(lex)))]
#[token("///", |lex| Comment::Doc(read_line(lex)))]
/// Represents a comment, both a line comment and a doc comment
Comment(Comment),
Comment(Comment<'a>),
#[end]
/// Represents an end of file token
EOF,
}
fn read_line<'a>(lexer: &mut Lexer<'a, TokenType>) -> String {
fn read_line<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Cow<'a, str> {
let rem = lexer.remainder();
let len = rem.find('\n').unwrap_or(rem.len());
let content = rem[..len].trim().to_string();
lexer.bump(len);
content
Cow::from(content)
}
#[derive(Hash, Debug, Eq, PartialEq, Clone)]
pub enum Comment {
Line(String),
Doc(String),
pub enum Comment<'a> {
Line(Cow<'a, str>),
Doc(Cow<'a, str>),
}
fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType>) -> Result<Number, LexError> {
fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result<Number, LexError> {
let slice = lexer.slice();
let last_char = slice.chars().last().unwrap_or_default();
let (num_str, suffix) = match last_char {
@@ -304,7 +306,7 @@ fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType>) -> Result<Number, LexError
}
}
impl std::fmt::Display for Comment {
impl<'a> std::fmt::Display for Comment<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Line(c) => write!(f, "// {}", c),
@@ -321,7 +323,7 @@ impl std::fmt::Display for Comment {
}
}
impl Documentation for TokenType {
impl<'a> Documentation for TokenType<'a> {
fn docs(&self) -> String {
match self {
Self::Keyword(k) => k.docs(),
@@ -336,7 +338,7 @@ impl Documentation for TokenType {
helpers::with_syscalls!(generate_check);
impl From<TokenType> for u32 {
impl<'a> From<TokenType<'a>> for u32 {
fn from(value: TokenType) -> Self {
match value {
TokenType::String(_) => 1,
@@ -376,7 +378,7 @@ impl From<TokenType> for u32 {
}
}
impl std::fmt::Display for TokenType {
impl<'a> std::fmt::Display for TokenType<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenType::String(s) => write!(f, "{}", s),