This commit is contained in:
2025-12-09 01:43:12 -07:00
parent fac36c756b
commit 72cf9ea042
6 changed files with 85 additions and 69 deletions

View File

@@ -22,7 +22,7 @@ macro_rules! compile {
(result $source:expr) => {{ (result $source:expr) => {{
let mut writer = std::io::BufWriter::new(Vec::new()); let mut writer = std::io::BufWriter::new(Vec::new());
let compiler = crate::Compiler::new( let compiler = crate::Compiler::new(
parser::Parser::new(tokenizer::Tokenizer::from(String::from($source))), parser::Parser::new(tokenizer::Tokenizer::from($source)),
&mut writer, &mut writer,
Some(crate::CompilerConfig { debug: true }), Some(crate::CompilerConfig { debug: true }),
); );
@@ -32,7 +32,7 @@ macro_rules! compile {
(debug $source:expr) => {{ (debug $source:expr) => {{
let mut writer = std::io::BufWriter::new(Vec::new()); let mut writer = std::io::BufWriter::new(Vec::new());
let compiler = crate::Compiler::new( let compiler = crate::Compiler::new(
parser::Parser::new(tokenizer::Tokenizer::from(String::from($source))), parser::Parser::new(tokenizer::Tokenizer::from($source)),
&mut writer, &mut writer,
Some(crate::CompilerConfig { debug: true }), Some(crate::CompilerConfig { debug: true }),
); );

View File

@@ -5,7 +5,6 @@ pub mod sys_call;
pub mod tree_node; pub mod tree_node;
use crate::sys_call::{Math, System}; use crate::sys_call::{Math, System};
use quick_error::quick_error;
use std::io::SeekFrom; use std::io::SeekFrom;
use sys_call::SysCall; use sys_call::SysCall;
use thiserror::Error; use thiserror::Error;
@@ -28,27 +27,27 @@ macro_rules! boxed {
} }
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum Error<'a> { pub enum Error {
#[error("Tokenizer Error: {0}")] #[error("Tokenizer Error: {0}")]
TokenizerError(#[from] tokenizer::Error), TokenizerError(#[from] tokenizer::Error),
#[error("Unexpected token: {1}")] #[error("Unexpected token: {1}")]
UnexpectedToken(Span, Token<'a>), UnexpectedToken(Span, Token),
#[error("Duplicate identifier: {1}")] #[error("Duplicate identifier: {1}")]
DuplicateIdentifier(Span, Token<'a>), DuplicateIdentifier(Span, Token),
#[error("Invalid Syntax: {1}")] #[error("Invalid Syntax: {1}")]
InvalidSyntax(Span, Token<'a>), InvalidSyntax(Span, String),
#[error("Unsupported Keyword: {1}")] #[error("Unsupported Keyword: {1}")]
UnsupportedKeyword(Span, Token<'a>), UnsupportedKeyword(Span, Token),
#[error("Unexpected End of File")] #[error("Unexpected End of File")]
UnexpectedEOF, UnexpectedEOF,
} }
impl<'a> From<Error<'a>> for lsp_types::Diagnostic { impl From<Error> for lsp_types::Diagnostic {
fn from(value: Error) -> Self { fn from(value: Error) -> Self {
use Error::*; use Error::*;
use lsp_types::*; use lsp_types::*;
@@ -107,8 +106,8 @@ macro_rules! self_matches_current {
pub struct Parser<'a> { pub struct Parser<'a> {
tokenizer: TokenizerBuffer<'a>, tokenizer: TokenizerBuffer<'a>,
current_token: Option<Token<'a>>, current_token: Option<Token>,
pub errors: Vec<Error<'a>>, pub errors: Vec<Error>,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
@@ -160,9 +159,10 @@ impl<'a> Parser<'a> {
let node = parser(self)?; let node = parser(self)?;
let end_token = self.current_token; let end_token = &self.current_token;
let (end_line, end_col) = end_token let (end_line, end_col) = end_token
.clone()
.map(|t| (t.line, t.span.end)) .map(|t| (t.line, t.span.end))
.unwrap_or((start_line, start_col)); .unwrap_or((start_line, start_col));
@@ -207,7 +207,7 @@ impl<'a> Parser<'a> {
let first_token = self.tokenizer.peek().unwrap_or(None); let first_token = self.tokenizer.peek().unwrap_or(None);
let (start_line, start_col) = first_token let (start_line, start_col) = first_token
.as_ref() .as_ref()
.map(|tok| (tok.line, tok.column)) .map(|tok| (tok.line, tok.span.start))
.unwrap_or((1, 1)); .unwrap_or((1, 1));
let mut expressions = Vec::<Spanned<Expression>>::new(); let mut expressions = Vec::<Spanned<Expression>>::new();
@@ -238,10 +238,7 @@ impl<'a> Parser<'a> {
let end_token_opt = self.tokenizer.peek().unwrap_or(None); let end_token_opt = self.tokenizer.peek().unwrap_or(None);
let (end_line, end_col) = end_token_opt let (end_line, end_col) = end_token_opt
.map(|tok| { .map(|tok| (tok.line, tok.span.end))
let len = tok.original_string.as_ref().map(|s| s.len()).unwrap_or(0);
(tok.line, tok.column + len)
})
.unwrap_or((start_line, start_col)); .unwrap_or((start_line, start_col));
let span = Span { let span = Span {

View File

@@ -44,7 +44,7 @@ pub trait Tokenize: Read + Seek {}
impl<T> Tokenize for T where T: Read + Seek {} impl<T> Tokenize for T where T: Read + Seek {}
pub struct Tokenizer<'a> { pub struct Tokenizer<'a> {
lexer: Lexer<'a, TokenType<'a>>, lexer: Lexer<'a, TokenType>,
returned_eof: bool, returned_eof: bool,
} }
@@ -58,14 +58,14 @@ impl<'a> From<&'a str> for Tokenizer<'a> {
} }
impl<'a> Tokenizer<'a> { impl<'a> Tokenizer<'a> {
fn get_token(&mut self, t_type: TokenType<'a>) -> Token<'a> { fn get_token(&mut self, t_type: TokenType) -> Token {
let mut span = self.lexer.span(); let mut span = self.lexer.span();
span.start -= self.lexer.extras.line_start_index; span.start -= self.lexer.extras.line_start_index;
span.end -= self.lexer.extras.line_start_index; span.end -= self.lexer.extras.line_start_index;
Token::new(t_type, self.lexer.extras.line_count, span) Token::new(t_type, self.lexer.extras.line_count, span)
} }
pub fn next_token(&mut self) -> Result<Option<Token<'a>>, Error> { pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
let to_return = self let to_return = self
.lexer .lexer
.next() .next()
@@ -79,7 +79,7 @@ impl<'a> Tokenizer<'a> {
// ... Iterator and TokenizerBuffer implementations remain unchanged ... // ... Iterator and TokenizerBuffer implementations remain unchanged ...
// They just call the methods above which now use the passed-in start coordinates. // They just call the methods above which now use the passed-in start coordinates.
impl<'a> Iterator for Tokenizer<'a> { impl<'a> Iterator for Tokenizer<'a> {
type Item = Result<Token<'a>, Error>; type Item = Result<Token, Error>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.lexer.next() { match self.lexer.next() {
None => { None => {
@@ -104,8 +104,8 @@ impl<'a> Iterator for Tokenizer<'a> {
pub struct TokenizerBuffer<'a> { pub struct TokenizerBuffer<'a> {
tokenizer: Tokenizer<'a>, tokenizer: Tokenizer<'a>,
buffer: VecDeque<Token<'a>>, buffer: VecDeque<Token>,
history: VecDeque<Token<'a>>, history: VecDeque<Token>,
index: i64, index: i64,
} }
@@ -118,7 +118,7 @@ impl<'a> TokenizerBuffer<'a> {
index: 0, index: 0,
} }
} }
pub fn next_token(&mut self) -> Result<Option<Token<'a>>, Error> { pub fn next_token(&mut self) -> Result<Option<Token>, Error> {
if let Some(token) = self.buffer.pop_front() { if let Some(token) = self.buffer.pop_front() {
self.history.push_back(token.clone()); self.history.push_back(token.clone());
self.index += 1; self.index += 1;
@@ -133,7 +133,7 @@ impl<'a> TokenizerBuffer<'a> {
self.index += 1; self.index += 1;
Ok(token) Ok(token)
} }
pub fn peek(&mut self) -> Result<Option<Token<'a>>, Error> { pub fn peek(&mut self) -> Result<Option<Token>, Error> {
if let Some(token) = self.buffer.front() { if let Some(token) = self.buffer.front() {
return Ok(Some(token.clone())); return Ok(Some(token.clone()));
} }

View File

@@ -43,7 +43,7 @@ impl From<LexError> for Diagnostic {
} }
impl LexError { impl LexError {
pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Self { pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType>) -> Self {
let mut span = lex.span(); let mut span = lex.span();
let line = lex.extras.line_count; let line = lex.extras.line_count;
span.start -= lex.extras.line_start_index; span.start -= lex.extras.line_start_index;
@@ -68,30 +68,30 @@ pub struct Extras {
pub line_start_index: usize, pub line_start_index: usize,
} }
fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Skip { fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType>) -> Skip {
lex.extras.line_count += 1; lex.extras.line_count += 1;
lex.extras.line_start_index = lex.span().end; lex.extras.line_start_index = lex.span().end;
Skip Skip
} }
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token<'a> { pub struct Token {
/// The type of the token /// The type of the token
pub token_type: TokenType<'a>, pub token_type: TokenType,
/// The line where the token was found /// The line where the token was found
pub line: usize, pub line: usize,
/// The span where the token starts and ends /// The span where the token starts and ends
pub span: Span, pub span: Span,
} }
impl<'a> std::fmt::Display for Token<'a> { impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.token_type) write!(f, "{}", self.token_type)
} }
} }
impl<'a> Token<'a> { impl Token {
pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self { pub fn new(token_type: TokenType, line: usize, span: Span) -> Self {
Self { Self {
token_type, token_type,
line, line,
@@ -158,16 +158,22 @@ macro_rules! keyword {
#[logos(skip r"[ \t\f]+")] #[logos(skip r"[ \t\f]+")]
#[logos(extras = Extras)] #[logos(extras = Extras)]
#[logos(error(LexError, LexError::from_lexer))] #[logos(error(LexError, LexError::from_lexer))]
pub enum TokenType<'a> { pub enum TokenType {
#[regex(r"\n", update_line_index)] #[regex(r"\n", update_line_index)]
Newline, Newline,
// matches strings with double quotes // matches strings with double quotes
#[regex(r#""(?:[^"\\]|\\.)*""#)] #[regex(r#""(?:[^"\\]|\\.)*""#, |v| {
let str = v.slice();
str[1..str.len() - 1].to_string()
})]
// matches strings with single quotes // matches strings with single quotes
#[regex(r#"'(?:[^'\\]|\\.)*'"#)] #[regex(r#"'(?:[^'\\]|\\.)*'"#, |v| {
let str = v.slice();
str[1..str.len() - 1].to_string()
})]
/// Represents a string token /// Represents a string token
String(&'a str), String(String),
#[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)] #[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)]
/// Represents a number token /// Represents a number token
@@ -193,9 +199,9 @@ pub enum TokenType<'a> {
/// Represents a keyword token /// Represents a keyword token
Keyword(Keyword), Keyword(Keyword),
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")] #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| v.slice().to_string())]
/// Represents an identifier token /// Represents an identifier token
Identifier(&'a str), Identifier(String),
#[token("(", symbol!(LParen))] #[token("(", symbol!(LParen))]
#[token(")", symbol!(RParen))] #[token(")", symbol!(RParen))]
@@ -227,10 +233,10 @@ pub enum TokenType<'a> {
/// Represents a symbol token /// Represents a symbol token
Symbol(Symbol), Symbol(Symbol),
#[regex(r"///[\n]*", |val| Comment::Doc(val.slice()[3..].trim()))] #[regex(r"///[\n]*", |val| Comment::Doc(val.slice()[3..].trim().to_string()))]
#[regex(r"//[\n]*", |val| Comment::Line(val.slice()[2..].trim()))] #[regex(r"//[\n]*", |val| Comment::Line(val.slice()[2..].trim().to_string()))]
/// Represents a comment, both a line comment and a doc comment /// Represents a comment, both a line comment and a doc comment
Comment(Comment<'a>), Comment(Comment),
#[end] #[end]
/// Represents an end of file token /// Represents an end of file token
@@ -238,12 +244,12 @@ pub enum TokenType<'a> {
} }
#[derive(Hash, Debug, Eq, PartialEq, Clone)] #[derive(Hash, Debug, Eq, PartialEq, Clone)]
pub enum Comment<'a> { pub enum Comment {
Line(&'a str), Line(String),
Doc(&'a str), Doc(String),
} }
fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result<Number, LexError> { fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType>) -> Result<Number, LexError> {
let slice = lexer.slice(); let slice = lexer.slice();
let last_char = slice.chars().last().unwrap_or_default(); let last_char = slice.chars().last().unwrap_or_default();
let (num_str, suffix) = match last_char { let (num_str, suffix) = match last_char {
@@ -289,7 +295,7 @@ fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result<Number, LexE
} }
} }
impl<'a> std::fmt::Display for Comment<'a> { impl std::fmt::Display for Comment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Self::Line(c) => write!(f, "// {}", c), Self::Line(c) => write!(f, "// {}", c),
@@ -306,7 +312,7 @@ impl<'a> std::fmt::Display for Comment<'a> {
} }
} }
impl<'a> Documentation for TokenType<'a> { impl Documentation for TokenType {
fn docs(&self) -> String { fn docs(&self) -> String {
match self { match self {
Self::Keyword(k) => k.docs(), Self::Keyword(k) => k.docs(),
@@ -321,7 +327,7 @@ impl<'a> Documentation for TokenType<'a> {
helpers::with_syscalls!(generate_check); helpers::with_syscalls!(generate_check);
impl<'a> From<TokenType<'a>> for u32 { impl From<TokenType> for u32 {
fn from(value: TokenType) -> Self { fn from(value: TokenType) -> Self {
match value { match value {
TokenType::String(_) => 1, TokenType::String(_) => 1,
@@ -361,7 +367,7 @@ impl<'a> From<TokenType<'a>> for u32 {
} }
} }
impl<'a> std::fmt::Display for TokenType<'a> { impl std::fmt::Display for TokenType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
TokenType::String(s) => write!(f, "{}", s), TokenType::String(s) => write!(f, "{}", s),

View File

@@ -4,7 +4,7 @@ use parser::{sys_call::SysCall, Parser};
use safer_ffi::prelude::*; use safer_ffi::prelude::*;
use std::io::BufWriter; use std::io::BufWriter;
use tokenizer::{ use tokenizer::{
token::{Token, TokenType}, token::{LexError, Token, TokenType},
Tokenizer, Tokenizer,
}; };
@@ -96,9 +96,10 @@ pub fn free_docs_vec(v: safer_ffi::Vec<FfiDocumentedItem>) {
#[ffi_export] #[ffi_export]
pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::String { pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::String {
let res = std::panic::catch_unwind(|| { let res = std::panic::catch_unwind(|| {
let input = String::from_utf16_lossy(input.as_slice());
let mut writer = BufWriter::new(Vec::new()); let mut writer = BufWriter::new(Vec::new());
let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); let tokenizer = Tokenizer::from(input.as_str());
let parser = Parser::new(tokenizer); let parser = Parser::new(tokenizer);
let compiler = Compiler::new(parser, &mut writer, None); let compiler = Compiler::new(parser, &mut writer, None);
@@ -120,7 +121,8 @@ pub fn compile_from_string(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::
#[ffi_export] #[ffi_export]
pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<FfiToken> { pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<FfiToken> {
let res = std::panic::catch_unwind(|| { let res = std::panic::catch_unwind(|| {
let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); let input = String::from_utf16_lossy(input.as_slice());
let tokenizer = Tokenizer::from(input.as_str());
let mut tokens = Vec::new(); let mut tokens = Vec::new();
@@ -136,34 +138,36 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<Ff
} }
match token { match token {
Err(ref e) => { Err(ref e) => {
use tokenizer::token::LexError;
use tokenizer::Error::*; use tokenizer::Error::*;
let (err_str, col, og) = match e { let (err_str, line, span) = match e {
NumberParseError(_, _, col, og) LexError(e) => match e {
| DecimalParseError(_, _, col, og) LexError::NumberParseError(line, span, err)
| UnknownSymbolError(_, _, col, og) | LexError::InvalidInput(line, span, err) => {
| UnknownKeywordOrIdentifierError(_, _, col, og) => { (err.to_string(), line, span)
(e.to_string(), col, og)
} }
_ => continue, _ => continue,
},
_ => continue,
}; };
tokens.push(FfiToken { tokens.push(FfiToken {
column: *col as i32, column: span.start as i32,
error: err_str.into(), error: err_str.into(),
tooltip: "".into(), tooltip: "".into(),
length: og.len() as i32, length: (span.end - span.start) as i32,
token_kind: 0, token_kind: 0,
}) })
} }
Ok(Token { Ok(Token {
column, line,
original_string, span,
token_type, token_type,
.. ..
}) => tokens.push(FfiToken { }) => tokens.push(FfiToken {
column: column as i32, column: span.start as i32,
error: "".into(), error: "".into(),
length: (original_string.unwrap_or_default().len()) as i32, length: (span.end - span.start) as i32,
tooltip: token_type.docs().into(), tooltip: token_type.docs().into(),
token_kind: token_type.into(), token_kind: token_type.into(),
}), }),
@@ -179,8 +183,10 @@ pub fn tokenize_line(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<Ff
#[ffi_export] #[ffi_export]
pub fn diagnose_source(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<FfiDiagnostic> { pub fn diagnose_source(input: safer_ffi::slice::Ref<'_, u16>) -> safer_ffi::Vec<FfiDiagnostic> {
let res = std::panic::catch_unwind(|| { let res = std::panic::catch_unwind(|| {
let input = String::from_utf16_lossy(input.as_slice());
let mut writer = BufWriter::new(Vec::new()); let mut writer = BufWriter::new(Vec::new());
let tokenizer = Tokenizer::from(String::from_utf16_lossy(input.as_slice())); let tokenizer = Tokenizer::from(input.as_str());
let compiler = Compiler::new(Parser::new(tokenizer), &mut writer, None); let compiler = Compiler::new(Parser::new(tokenizer), &mut writer, None);
let diagnosis = compiler.compile(); let diagnosis = compiler.compile();

View File

@@ -50,8 +50,13 @@ fn run_logic() -> Result<(), StationlangError> {
let args = Args::parse(); let args = Args::parse();
let input_file = args.input_file; let input_file = args.input_file;
let tokenizer: Tokenizer = match input_file { let input_string = match input_file {
Some(input_file) => Tokenizer::from_path(&input_file)?, Some(input_path) => {
let mut buf = String::new();
let mut file = std::fs::File::open(input_path).unwrap();
file.read_to_string(&mut buf).unwrap();
buf
}
None => { None => {
let mut buf = String::new(); let mut buf = String::new();
let stdin = std::io::stdin(); let stdin = std::io::stdin();
@@ -62,10 +67,12 @@ fn run_logic() -> Result<(), StationlangError> {
return Ok(()); return Ok(());
} }
Tokenizer::from(buf) buf
} }
}; };
let tokenizer = Tokenizer::from(input_string.as_str());
let parser = ASTParser::new(tokenizer); let parser = ASTParser::new(tokenizer);
let mut writer: BufWriter<Box<dyn Write>> = match args.output_file { let mut writer: BufWriter<Box<dyn Write>> = match args.output_file {