This commit is contained in:
2025-06-12 21:22:58 -07:00
parent 245567c354
commit a28c70add7
11 changed files with 131 additions and 1263 deletions

1
Cargo.lock generated
View File

@@ -602,6 +602,7 @@ dependencies = [
"clap", "clap",
"quick-error", "quick-error",
"rust_decimal", "rust_decimal",
"tokenizer",
] ]
[[package]] [[package]]

View File

@@ -18,6 +18,7 @@ path = "src/main.rs"
clap = { version = "^4.5", features = ["derive"] } clap = { version = "^4.5", features = ["derive"] }
quick-error = { workspace = true } quick-error = { workspace = true }
rust_decimal = { workspace = true } rust_decimal = { workspace = true }
tokenizer = { path = "libs/tokenizer" }
[dev-dependencies] [dev-dependencies]
anyhow = { version = "^1.0", features = ["backtrace"] } anyhow = { version = "^1.0", features = ["backtrace"] }

View File

@@ -39,7 +39,7 @@ pub trait Tokenize: Read + Seek {}
impl<T> Tokenize for T where T: Read + Seek {} impl<T> Tokenize for T where T: Read + Seek {}
pub(crate) struct Tokenizer { pub struct Tokenizer {
reader: BufReader<Box<dyn Tokenize>>, reader: BufReader<Box<dyn Tokenize>>,
char_buffer: [u8; 1], char_buffer: [u8; 1],
line: usize, line: usize,
@@ -185,8 +185,8 @@ impl Tokenizer {
/// If there are no more tokens in the stream, this function returns None /// If there are no more tokens in the stream, this function returns None
pub fn peek_next(&mut self) -> Result<Option<Token>, TokenizerError> { pub fn peek_next(&mut self) -> Result<Option<Token>, TokenizerError> {
let current_pos = self.reader.stream_position()?; let current_pos = self.reader.stream_position()?;
let column = self.column.clone(); let column = self.column;
let line = self.line.clone(); let line = self.line;
let token = self.next_token()?; let token = self.next_token()?;
self.reader.seek(SeekFrom::Start(current_pos))?; self.reader.seek(SeekFrom::Start(current_pos))?;
@@ -280,8 +280,8 @@ impl Tokenizer {
let mut decimal: Option<String> = None; let mut decimal: Option<String> = None;
let mut reading_decimal = false; let mut reading_decimal = false;
let column = self.column.clone(); let column = self.column;
let line = self.line.clone(); let line = self.line;
primary.push(first_char); primary.push(first_char);
@@ -353,8 +353,8 @@ impl Tokenizer {
fn tokenize_string(&mut self, beginning_quote: char) -> Result<Token, TokenizerError> { fn tokenize_string(&mut self, beginning_quote: char) -> Result<Token, TokenizerError> {
let mut buffer = String::with_capacity(16); let mut buffer = String::with_capacity(16);
let column = self.column.clone(); let column = self.column;
let line = self.line.clone(); let line = self.line;
while let Some(next_char) = self.next_char()? { while let Some(next_char) = self.next_char()? {
if next_char == beginning_quote { if next_char == beginning_quote {
@@ -385,13 +385,13 @@ impl Tokenizer {
/// Helper macro to check if the next character is whitespace or not alphanumeric /// Helper macro to check if the next character is whitespace or not alphanumeric
macro_rules! next_ws { macro_rules! next_ws {
() => { () => {
matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || matches!(self.peek_next_char()?, None) matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || self.peek_next_char()?.is_none()
}; };
} }
let mut buffer = String::with_capacity(16); let mut buffer = String::with_capacity(16);
let line = self.line.clone(); let line = self.line;
let column = self.column.clone(); let column = self.column;
let mut looped_char = Some(first_char); let mut looped_char = Some(first_char);
@@ -464,7 +464,7 @@ impl TokenizerBuffer {
/// Reads the next token from the tokenizer, pushing the value to the back of the history /// Reads the next token from the tokenizer, pushing the value to the back of the history
/// and returning the token /// and returning the token
pub fn next(&mut self) -> Result<Option<Token>, TokenizerError> { pub fn next_token(&mut self) -> Result<Option<Token>, TokenizerError> {
if let Some(token) = self.buffer.pop_front() { if let Some(token) = self.buffer.pop_front() {
self.history.push_back(token.clone()); self.history.push_back(token.clone());
return Ok(Some(token)); return Ok(Some(token));
@@ -561,12 +561,12 @@ mod tests {
let tokenizer = Tokenizer::from(TEST_STRING.to_owned()); let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let mut buffer = TokenizerBuffer::new(tokenizer); let mut buffer = TokenizerBuffer::new(tokenizer);
let token = buffer.next()?.unwrap(); let token = buffer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
buffer.seek(SeekFrom::Current(1))?; buffer.seek(SeekFrom::Current(1))?;
let token = buffer.next()?.unwrap(); let token = buffer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen)); assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen));
@@ -870,8 +870,8 @@ mod tests {
fn test_peek_next() -> Result<()> { fn test_peek_next() -> Result<()> {
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let column = tokenizer.column.clone(); let column = tokenizer.column;
let line = tokenizer.line.clone(); let line = tokenizer.line;
let peeked_token = tokenizer.peek_next()?; let peeked_token = tokenizer.peek_next()?;
@@ -894,4 +894,3 @@ mod tests {
Ok(()) Ok(())
} }
} }

View File

@@ -106,7 +106,7 @@ impl std::fmt::Display for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Number::Integer(i) => write!(f, "{}", i), Number::Integer(i) => write!(f, "{}", i),
Number::Decimal(d) => write!(f, "{}", d.to_string()), Number::Decimal(d) => write!(f, "{}", d),
} }
} }
} }
@@ -172,29 +172,26 @@ pub enum Symbol {
impl Symbol { impl Symbol {
pub fn is_operator(&self) -> bool { pub fn is_operator(&self) -> bool {
match self { matches!(
Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp => true, self,
_ => false, Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp
} )
} }
pub fn is_comparison(&self) -> bool { pub fn is_comparison(&self) -> bool {
match self { matches!(
self,
Symbol::LessThan Symbol::LessThan
| Symbol::GreaterThan | Symbol::GreaterThan
| Symbol::Equal | Symbol::Equal
| Symbol::NotEqual | Symbol::NotEqual
| Symbol::LessThanOrEqual | Symbol::LessThanOrEqual
| Symbol::GreaterThanOrEqual => true, | Symbol::GreaterThanOrEqual,
_ => false, )
}
} }
pub fn is_logical(&self) -> bool { pub fn is_logical(&self) -> bool {
match self { matches!(self, Symbol::LogicalAnd | Symbol::LogicalOr)
Symbol::LogicalAnd | Symbol::LogicalOr => true,
_ => false,
}
} }
} }

View File

@@ -127,17 +127,14 @@ impl<'a> Compiler<'a> {
fn expression(&mut self, expression: Expression) -> Result<(), CompileError> { fn expression(&mut self, expression: Expression) -> Result<(), CompileError> {
match expression { match expression {
Expression::FunctionExpression(expr) => self.function_expression(expr)?, Expression::Function(expr) => self.function_expression(expr)?,
Expression::BlockExpression(expr) => self.block_expression(expr)?, Expression::Block(expr) => self.block_expression(expr)?,
Expression::InvocationExpression(expr) => self.invocation_expression(expr)?, Expression::Invocation(expr) => self.invocation_expression(expr)?,
Expression::BinaryExpression(expr) => self.binary_expression(expr)?, Expression::Binary(expr) => self.binary_expression(expr)?,
Expression::DeclarationExpression(var_name, expr) => { Expression::Declaration(var_name, expr) => {
self.declaration_expression(&var_name, *expr)? self.declaration_expression(&var_name, *expr)?
} }
Expression::DeviceDeclarationExpression(DeviceDeclarationExpression { Expression::DeviceDeclaration(DeviceDeclarationExpression { name, device }) => {
name,
device,
}) => {
self.devices.insert(name, device); self.devices.insert(name, device);
} }
_ => todo!("{:?}", expression), _ => todo!("{:?}", expression),
@@ -156,11 +153,11 @@ impl<'a> Compiler<'a> {
self.push_stack(var_name)?; self.push_stack(var_name)?;
self.write_output(format!("push {num}"))?; self.write_output(format!("push {num}"))?;
} }
Expression::BinaryExpression(expr) => { Expression::Binary(expr) => {
self.binary_expression(expr)?; self.binary_expression(expr)?;
self.push_stack(var_name)?; self.push_stack(var_name)?;
} }
Expression::SyscallExpression(expr) => { Expression::Syscall(expr) => {
self.syscall_declaration_expression(expr)?; self.syscall_declaration_expression(expr)?;
self.push_stack(var_name)?; self.push_stack(var_name)?;
} }
@@ -172,6 +169,7 @@ impl<'a> Compiler<'a> {
fn syscall_declaration_expression(&mut self, expr: SysCall) -> Result<(), CompileError> { fn syscall_declaration_expression(&mut self, expr: SysCall) -> Result<(), CompileError> {
use crate::parser::sys_call::System; use crate::parser::sys_call::System;
#[allow(clippy::collapsible_match)]
match expr { match expr {
SysCall::System(ref sys) => match sys { SysCall::System(ref sys) => match sys {
System::LoadFromDevice(LiteralOrVariable::Variable(device), value) => { System::LoadFromDevice(LiteralOrVariable::Variable(device), value) => {
@@ -212,12 +210,12 @@ impl<'a> Compiler<'a> {
compiler.write_output("push r15")?; compiler.write_output("push r15")?;
compiler.push_stack(&format!("{op}ExpressionLeft"))?; compiler.push_stack(&format!("{op}ExpressionLeft"))?;
} }
Expression::BinaryExpression(expr) => { Expression::Binary(expr) => {
compiler.binary_expression(expr)?; compiler.binary_expression(expr)?;
compiler.push_stack(&format!("{op}ExpressionLeft"))?; compiler.push_stack(&format!("{op}ExpressionLeft"))?;
} }
Expression::PriorityExpression(expr) => match *expr { Expression::Priority(expr) => match *expr {
Expression::BinaryExpression(expr) => { Expression::Binary(expr) => {
compiler.binary_expression(expr)?; compiler.binary_expression(expr)?;
compiler.push_stack(&format!("{op}ExpressionLeft"))?; compiler.push_stack(&format!("{op}ExpressionLeft"))?;
} }
@@ -238,12 +236,12 @@ impl<'a> Compiler<'a> {
compiler.write_output("push r15")?; compiler.write_output("push r15")?;
compiler.push_stack(&format!("{op}ExpressionRight"))?; compiler.push_stack(&format!("{op}ExpressionRight"))?;
} }
Expression::BinaryExpression(expr) => { Expression::Binary(expr) => {
compiler.binary_expression(expr)?; compiler.binary_expression(expr)?;
compiler.push_stack(&format!("{op}ExpressionRight"))?; compiler.push_stack(&format!("{op}ExpressionRight"))?;
} }
Expression::PriorityExpression(expr) => match *expr { Expression::Priority(expr) => match *expr {
Expression::BinaryExpression(expr) => { Expression::Binary(expr) => {
compiler.binary_expression(expr)?; compiler.binary_expression(expr)?;
compiler.push_stack(&format!("{op}ExpressionRight"))?; compiler.push_stack(&format!("{op}ExpressionRight"))?;
} }
@@ -304,7 +302,7 @@ impl<'a> Compiler<'a> {
to_write.push_str("get r15 db r15\n"); to_write.push_str("get r15 db r15\n");
to_write.push_str("push r15\n"); to_write.push_str("push r15\n");
} }
Expression::BinaryExpression(expr) => { Expression::Binary(expr) => {
self.binary_expression(expr)?; self.binary_expression(expr)?;
to_write.push_str("push r0\n"); to_write.push_str("push r0\n");
} }
@@ -353,11 +351,9 @@ impl<'a> Compiler<'a> {
// hoist functions to the top of the block // hoist functions to the top of the block
expression.0.sort_by(|a, b| { expression.0.sort_by(|a, b| {
if matches!(a, Expression::FunctionExpression(_)) if matches!(a, Expression::Function(_)) && matches!(b, Expression::Function(_)) {
&& matches!(b, Expression::FunctionExpression(_))
{
Ordering::Equal Ordering::Equal
} else if matches!(a, Expression::FunctionExpression(_)) { } else if matches!(a, Expression::Function(_)) {
Ordering::Less Ordering::Less
} else { } else {
Ordering::Greater Ordering::Greater
@@ -366,7 +362,7 @@ impl<'a> Compiler<'a> {
for expr in expression.0 { for expr in expression.0 {
// if we haven't declared main yet and we have already declared all the function expressions, declare main // if we haven't declared main yet and we have already declared all the function expressions, declare main
if !self.declared_main && !matches!(expr, Expression::FunctionExpression(_)) { if !self.declared_main && !matches!(expr, Expression::Function(_)) {
self.write_output("main:")?; self.write_output("main:")?;
self.declared_main = true; self.declared_main = true;
} }

View File

@@ -3,7 +3,6 @@ extern crate quick_error;
mod compiler; mod compiler;
mod parser; mod parser;
mod tokenizer;
use clap::Parser; use clap::Parser;
use compiler::Compiler; use compiler::Compiler;
@@ -11,6 +10,7 @@ use parser::Parser as ASTParser;
use std::{ use std::{
fs::File, fs::File,
io::{BufWriter, Read, Write}, io::{BufWriter, Read, Write},
path::PathBuf,
}; };
use tokenizer::{Tokenizer, TokenizerError}; use tokenizer::{Tokenizer, TokenizerError};
@@ -49,10 +49,10 @@ quick_error! {
struct Args { struct Args {
/// What file should be compiled. If not set, input will be read from stdin. /// What file should be compiled. If not set, input will be read from stdin.
#[arg(short, long)] #[arg(short, long)]
input_file: Option<String>, input_file: Option<PathBuf>,
/// The output file for the compiled program. If not set, output will go to stdout. /// The output file for the compiled program. If not set, output will go to stdout.
#[arg(short, long)] #[arg(short, long)]
output_file: Option<String>, output_file: Option<PathBuf>,
} }
fn run_logic() -> Result<(), StationlangError> { fn run_logic() -> Result<(), StationlangError> {

View File

@@ -1,15 +1,13 @@
pub mod sys_call; pub mod sys_call;
pub mod tree_node; pub mod tree_node;
use crate::{ use crate::boxed;
boxed,
tokenizer::{
token::{Keyword, Symbol, Token, TokenType},
Tokenizer, TokenizerBuffer, TokenizerError,
},
};
use std::io::SeekFrom; use std::io::SeekFrom;
use sys_call::SysCall; use sys_call::SysCall;
use tokenizer::{
token::{Keyword, Symbol, Token, TokenType},
Tokenizer, TokenizerBuffer, TokenizerError,
};
use tree_node::*; use tree_node::*;
quick_error! { quick_error! {
@@ -119,9 +117,7 @@ impl Parser {
expressions.push(expression); expressions.push(expression);
} }
Ok(Some(Expression::BlockExpression(BlockExpression( Ok(Some(Expression::Block(BlockExpression(expressions))))
expressions,
))))
} }
/// Parses the input from the tokenizer buffer and returns the resulting expression /// Parses the input from the tokenizer buffer and returns the resulting expression
@@ -138,7 +134,7 @@ impl Parser {
/// Assigns the next token in the tokenizer buffer to the current token /// Assigns the next token in the tokenizer buffer to the current token
fn assign_next(&mut self) -> Result<(), ParseError> { fn assign_next(&mut self) -> Result<(), ParseError> {
self.current_token = self.tokenizer.next()?; self.current_token = self.tokenizer.next_token()?;
Ok(()) Ok(())
} }
@@ -174,43 +170,41 @@ impl Parser {
// match declarations with a `let` keyword // match declarations with a `let` keyword
TokenType::Keyword(Keyword::Let) => self.declaration()?, TokenType::Keyword(Keyword::Let) => self.declaration()?,
TokenType::Keyword(Keyword::Device) => { TokenType::Keyword(Keyword::Device) => Expression::DeviceDeclaration(self.device()?),
Expression::DeviceDeclarationExpression(self.device()?)
}
// match functions with a `fn` keyword // match functions with a `fn` keyword
TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?), TokenType::Keyword(Keyword::Fn) => Expression::Function(self.function()?),
// match syscalls with a `syscall` keyword // match syscalls with a `syscall` keyword
TokenType::Identifier(ref id) if SysCall::is_syscall(id) => { TokenType::Identifier(ref id) if SysCall::is_syscall(id) => {
Expression::SyscallExpression(self.syscall()?) Expression::Syscall(self.syscall()?)
} }
// match a variable expression with opening parenthesis // match a variable expression with opening parenthesis
TokenType::Identifier(_) TokenType::Identifier(_)
if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) =>
{ {
Expression::InvocationExpression(self.invocation()?) Expression::Invocation(self.invocation()?)
} }
// match a variable expression with an assignment // match a variable expression with an assignment
TokenType::Identifier(_) TokenType::Identifier(_)
if self_matches_peek!(self, TokenType::Symbol(Symbol::Assign)) => if self_matches_peek!(self, TokenType::Symbol(Symbol::Assign)) =>
{ {
Expression::AssignmentExpression(self.assignment()?) Expression::Assignment(self.assignment()?)
} }
// match variable expressions with an identifier // match variable expressions with an identifier
TokenType::Identifier(ref id) => Expression::Variable(id.clone()), TokenType::Identifier(ref id) => Expression::Variable(id.clone()),
// match block expressions with a `{` symbol // match block expressions with a `{` symbol
TokenType::Symbol(Symbol::LBrace) => Expression::BlockExpression(self.block()?), TokenType::Symbol(Symbol::LBrace) => Expression::Block(self.block()?),
// match literal expressions with a semi-colon afterwards // match literal expressions with a semi-colon afterwards
TokenType::Number(_) | TokenType::String(_) => Expression::Literal(self.literal()?), TokenType::Number(_) | TokenType::String(_) => Expression::Literal(self.literal()?),
// match priority expressions with a left parenthesis // match priority expressions with a left parenthesis
TokenType::Symbol(Symbol::LParen) => Expression::PriorityExpression(self.priority()?), TokenType::Symbol(Symbol::LParen) => Expression::Priority(self.priority()?),
_ => { _ => {
return Err(ParseError::UnexpectedToken(current_token.clone())); return Err(ParseError::UnexpectedToken(current_token.clone()));
@@ -223,13 +217,13 @@ impl Parser {
// check if the next or current token is an operator // check if the next or current token is an operator
if self_matches_peek!(self, TokenType::Symbol(s) if s.is_operator()) { if self_matches_peek!(self, TokenType::Symbol(s) if s.is_operator()) {
return Ok(Some(Expression::BinaryExpression(self.binary(expr)?))); return Ok(Some(Expression::Binary(self.binary(expr)?)));
} }
// This is an edge case. We need to move back one token if the current token is an operator // This is an edge case. We need to move back one token if the current token is an operator
// so the binary expression can pick up the operator // so the binary expression can pick up the operator
else if self_matches_current!(self, TokenType::Symbol(s) if s.is_operator()) { else if self_matches_current!(self, TokenType::Symbol(s) if s.is_operator()) {
self.tokenizer.seek(SeekFrom::Current(-1))?; self.tokenizer.seek(SeekFrom::Current(-1))?;
return Ok(Some(Expression::BinaryExpression(self.binary(expr)?))); return Ok(Some(Expression::Binary(self.binary(expr)?)));
} }
Ok(Some(expr)) Ok(Some(expr))
@@ -248,14 +242,12 @@ impl Parser {
Ok(Expression::Variable(ident)) Ok(Expression::Variable(ident))
} }
// A priority expression ( -> (1 + 2) <- + 3 ) // A priority expression ( -> (1 + 2) <- + 3 )
TokenType::Symbol(Symbol::LParen) => { TokenType::Symbol(Symbol::LParen) => self.priority().map(Expression::Priority),
self.priority().map(Expression::PriorityExpression)
}
// A function invocation // A function invocation
TokenType::Identifier(_) TokenType::Identifier(_)
if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) =>
{ {
self.invocation().map(Expression::InvocationExpression) self.invocation().map(Expression::Invocation)
} }
_ => Err(ParseError::UnexpectedToken(current_token.clone())), _ => Err(ParseError::UnexpectedToken(current_token.clone())),
} }
@@ -322,9 +314,9 @@ impl Parser {
// first, make sure the previous expression supports binary expressions // first, make sure the previous expression supports binary expressions
match previous { match previous {
Expression::BinaryExpression(_) // 1 + 2 + 3 Expression::Binary(_) // 1 + 2 + 3
| Expression::InvocationExpression(_) // add() + 3 | Expression::Invocation(_) // add() + 3
| Expression::PriorityExpression(_) // (1 + 2) + 3 | Expression::Priority(_) // (1 + 2) + 3
| Expression::Literal(Literal::Number(_)) // 1 + 2 (no addition of strings) | Expression::Literal(Literal::Number(_)) // 1 + 2 (no addition of strings)
| Expression::Variable(_) // x + 2 | Expression::Variable(_) // x + 2
| Expression::Negation(_) // -1 + 2 | Expression::Negation(_) // -1 + 2
@@ -371,10 +363,7 @@ impl Parser {
let right = expressions.remove(index); let right = expressions.remove(index);
expressions.insert( expressions.insert(
index, index,
Expression::BinaryExpression(BinaryExpression::Exponent( Expression::Binary(BinaryExpression::Exponent(boxed!(left), boxed!(right))),
boxed!(left),
boxed!(right),
)),
); );
current_iteration += 1; current_iteration += 1;
} }
@@ -394,17 +383,11 @@ impl Parser {
match operator { match operator {
Symbol::Asterisk => expressions.insert( Symbol::Asterisk => expressions.insert(
index, index,
Expression::BinaryExpression(BinaryExpression::Multiply( Expression::Binary(BinaryExpression::Multiply(boxed!(left), boxed!(right))),
boxed!(left),
boxed!(right),
)),
), ),
Symbol::Slash => expressions.insert( Symbol::Slash => expressions.insert(
index, index,
Expression::BinaryExpression(BinaryExpression::Divide( Expression::Binary(BinaryExpression::Divide(boxed!(left), boxed!(right))),
boxed!(left),
boxed!(right),
)),
), ),
// safety: we have already checked for the operator // safety: we have already checked for the operator
_ => unreachable!(), _ => unreachable!(),
@@ -427,17 +410,11 @@ impl Parser {
match operator { match operator {
Symbol::Plus => expressions.insert( Symbol::Plus => expressions.insert(
index, index,
Expression::BinaryExpression(BinaryExpression::Add( Expression::Binary(BinaryExpression::Add(boxed!(left), boxed!(right))),
boxed!(left),
boxed!(right),
)),
), ),
Symbol::Minus => expressions.insert( Symbol::Minus => expressions.insert(
index, index,
Expression::BinaryExpression(BinaryExpression::Subtract( Expression::Binary(BinaryExpression::Subtract(boxed!(left), boxed!(right))),
boxed!(left),
boxed!(right),
)),
), ),
// safety: we have already checked for the operator // safety: we have already checked for the operator
_ => unreachable!(), _ => unreachable!(),
@@ -467,7 +444,7 @@ impl Parser {
// Ensure the last expression is a binary expression // Ensure the last expression is a binary expression
match expressions.pop().unwrap() { match expressions.pop().unwrap() {
Expression::BinaryExpression(binary) => Ok(binary), Expression::Binary(binary) => Ok(binary),
_ => unreachable!(), _ => unreachable!(),
} }
} }
@@ -512,7 +489,7 @@ impl Parser {
let current_token = token_from_option!(self.current_token); let current_token = token_from_option!(self.current_token);
let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?; let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?;
if let Expression::BlockExpression(_) = expression { if let Expression::Block(_) = expression {
return Err(ParseError::InvalidSyntax( return Err(ParseError::InvalidSyntax(
current_token, current_token,
String::from("Block expressions are not allowed in function invocations"), String::from("Block expressions are not allowed in function invocations"),
@@ -568,7 +545,7 @@ impl Parser {
if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) { if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) {
self.assign_next()?; self.assign_next()?;
let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?; let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?;
let return_expr = Expression::ReturnExpression(boxed!(expression)); let return_expr = Expression::Return(boxed!(expression));
expressions.push(return_expr); expressions.push(return_expr);
self.assign_next()?; self.assign_next()?;
} }
@@ -604,7 +581,7 @@ impl Parser {
return Err(ParseError::UnexpectedToken(current_token.clone())); return Err(ParseError::UnexpectedToken(current_token.clone()));
} }
Ok(Expression::DeclarationExpression( Ok(Expression::Declaration(
identifier, identifier,
boxed!(assignment_expression), boxed!(assignment_expression),
)) ))

View File

@@ -151,11 +151,29 @@ impl std::fmt::Display for SysCall {
impl SysCall { impl SysCall {
pub fn is_syscall(identifier: &str) -> bool { pub fn is_syscall(identifier: &str) -> bool {
match identifier { matches!(
"yield" | "sleep" | "HASH" | "loadFromDevice" | "setOnDevice" => true, identifier,
"acos" | "asin" | "atan" | "atan2" | "abs" | "ceil" | "cos" | "floor" | "log" "yield"
| "max" | "min" | "rand" | "sin" | "sqrt" | "tan" | "trunc" => true, | "sleep"
_ => false, | "HASH"
} | "loadFromDevice"
| "setOnDevice"
| "acos"
| "asin"
| "atan"
| "atan2"
| "abs"
| "ceil"
| "cos"
| "floor"
| "log"
| "max"
| "min"
| "rand"
| "sin"
| "sqrt"
| "tan"
| "trunc"
)
} }
} }

View File

@@ -1,6 +1,5 @@
use crate::tokenizer::token::Number;
use super::sys_call::SysCall; use super::sys_call::SysCall;
use tokenizer::token::Number;
#[derive(Debug, Eq, PartialEq, Clone)] #[derive(Debug, Eq, PartialEq, Clone)]
pub enum Literal { pub enum Literal {
@@ -92,11 +91,7 @@ impl std::fmt::Display for FunctionExpression {
f, f,
"(fn {}({}) {{ {} }})", "(fn {}({}) {{ {} }})",
self.name, self.name,
self.arguments self.arguments.to_vec().join(", "),
.iter()
.cloned()
.collect::<Vec<String>>()
.join(", "),
self.body self.body
) )
} }
@@ -171,20 +166,20 @@ impl std::fmt::Display for DeviceDeclarationExpression {
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Expression { pub enum Expression {
AssignmentExpression(AssignmentExpression), Assignment(AssignmentExpression),
BinaryExpression(BinaryExpression), Binary(BinaryExpression),
BlockExpression(BlockExpression), Block(BlockExpression),
DeclarationExpression(String, Box<Expression>), Declaration(String, Box<Expression>),
FunctionExpression(FunctionExpression), Function(FunctionExpression),
InvocationExpression(InvocationExpression), Invocation(InvocationExpression),
Literal(Literal), Literal(Literal),
LogicalExpression(LogicalExpression), Logical(LogicalExpression),
Negation(Box<Expression>), Negation(Box<Expression>),
PriorityExpression(Box<Expression>), Priority(Box<Expression>),
ReturnExpression(Box<Expression>), Return(Box<Expression>),
Variable(String), Variable(String),
DeviceDeclarationExpression(DeviceDeclarationExpression), DeviceDeclaration(DeviceDeclarationExpression),
SyscallExpression(SysCall), Syscall(SysCall),
} }
impl std::fmt::Display for Expression { impl std::fmt::Display for Expression {
@@ -192,18 +187,18 @@ impl std::fmt::Display for Expression {
match self { match self {
Expression::Literal(l) => write!(f, "{}", l), Expression::Literal(l) => write!(f, "{}", l),
Expression::Negation(e) => write!(f, "(-{})", e), Expression::Negation(e) => write!(f, "(-{})", e),
Expression::BinaryExpression(e) => write!(f, "{}", e), Expression::Binary(e) => write!(f, "{}", e),
Expression::LogicalExpression(e) => write!(f, "{}", e), Expression::Logical(e) => write!(f, "{}", e),
Expression::AssignmentExpression(e) => write!(f, "{}", e), Expression::Assignment(e) => write!(f, "{}", e),
Expression::DeclarationExpression(id, e) => write!(f, "(let {} = {})", id, e), Expression::Declaration(id, e) => write!(f, "(let {} = {})", id, e),
Expression::FunctionExpression(e) => write!(f, "{}", e), Expression::Function(e) => write!(f, "{}", e),
Expression::BlockExpression(e) => write!(f, "{}", e), Expression::Block(e) => write!(f, "{}", e),
Expression::InvocationExpression(e) => write!(f, "{}", e), Expression::Invocation(e) => write!(f, "{}", e),
Expression::Variable(id) => write!(f, "{}", id), Expression::Variable(id) => write!(f, "{}", id),
Expression::PriorityExpression(e) => write!(f, "({})", e), Expression::Priority(e) => write!(f, "({})", e),
Expression::ReturnExpression(e) => write!(f, "(return {})", e), Expression::Return(e) => write!(f, "(return {})", e),
Expression::DeviceDeclarationExpression(e) => write!(f, "{}", e), Expression::DeviceDeclaration(e) => write!(f, "{}", e),
Expression::SyscallExpression(e) => write!(f, "{}", e), Expression::Syscall(e) => write!(f, "{}", e),
} }
} }
} }

View File

@@ -1,895 +0,0 @@
pub mod token;
use rust_decimal::Decimal;
use std::{
cmp::Ordering,
collections::VecDeque,
io::{BufReader, Cursor, Read, Seek, SeekFrom},
path::PathBuf,
};
use token::{Keyword, Number, Symbol, Temperature, Token, TokenType};
quick_error! {
#[derive(Debug)]
pub enum TokenizerError {
IOError(err: std::io::Error) {
from()
display("IO Error: {}", err)
source(err)
}
NumberParseError(err: std::num::ParseIntError, line: usize, column: usize) {
display("Number Parse Error: {}\nLine: {}, Column: {}", err, line, column)
source(err)
}
DecimalParseError(err: rust_decimal::Error, line: usize, column: usize) {
display("Decimal Parse Error: {}\nLine: {}, Column: {}", err, line, column)
source(err)
}
UnknownSymbolError(char: char, line: usize, column: usize) {
display("Unknown Symbol: {}\nLine: {}, Column: {}", char, line, column)
}
UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize) {
display("Unknown Keyword or Identifier: {}\nLine: {}, Column: {}", val, line, column)
}
}
}
pub trait Tokenize: Read + Seek {}
impl<T> Tokenize for T where T: Read + Seek {}
pub(crate) struct Tokenizer {
reader: BufReader<Box<dyn Tokenize>>,
char_buffer: [u8; 1],
line: usize,
column: usize,
returned_eof: bool,
}
impl Tokenizer {
pub fn from_path(input_file: impl Into<PathBuf>) -> Result<Self, TokenizerError> {
let file = std::fs::File::open(input_file.into())?;
let reader = BufReader::new(Box::new(file) as Box<dyn Tokenize>);
Ok(Self {
reader,
line: 1,
column: 1,
char_buffer: [0],
returned_eof: false,
})
}
}
impl From<String> for Tokenizer {
fn from(input: String) -> Self {
let reader = BufReader::new(Box::new(Cursor::new(input)) as Box<dyn Tokenize>);
Self {
reader,
line: 1,
column: 1,
char_buffer: [0],
returned_eof: false,
}
}
}
impl Tokenizer {
/// Consumes the tokenizer and returns the next token in the stream
/// If there are no more tokens in the stream, this function returns None
/// If there is an error reading the stream, this function returns an error
///
/// # Important
/// This function will increment the line and column counters
fn next_char(&mut self) -> Result<Option<char>, TokenizerError> {
let bytes_read = self.reader.read(&mut self.char_buffer)?;
if bytes_read == 0 {
return Ok(None);
}
// Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1
let c = self.char_buffer[0] as char;
if c == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
Ok(Some(c))
}
/// Peeks the next character in the stream without consuming it
///
/// # Important
/// This does not increment the line or column counters
fn peek_next_char(&mut self) -> Result<Option<char>, TokenizerError> {
let current_pos = self.reader.stream_position()?;
let to_return = if self.reader.read(&mut self.char_buffer)? == 0 {
None
} else {
self.reader.seek(SeekFrom::Start(current_pos))?;
// Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1
Some(self.char_buffer[0] as char)
};
Ok(to_return)
}
/// Skips the current line in the stream.
/// Useful for skipping comments or empty lines
///
/// # Important
/// This function will increment the line and column counters
fn skip_line(&mut self) -> Result<(), TokenizerError> {
while let Some(next_char) = self.next_char()? {
if next_char == '\n' {
break;
}
}
Ok(())
}
/// Consumes the tokenizer and returns the next token in the stream
/// If there are no more tokens in the stream, this function returns None
pub fn next_token(&mut self) -> Result<Option<Token>, TokenizerError> {
while let Some(next_char) = self.next_char()? {
// skip whitespace
if next_char.is_whitespace() {
continue;
}
// skip comments
if next_char == '/' && self.peek_next_char()? == Some('/') {
self.skip_line()?;
continue;
}
match next_char {
// numbers
'0'..='9' => {
return self.tokenize_number(next_char).map(Some);
}
// strings
'"' | '\'' => return self.tokenize_string(next_char).map(Some),
// symbols excluding `"` and `'`
char if !char.is_alphanumeric() && char != '"' && char != '\'' => {
return self.tokenize_symbol(next_char).map(Some)
}
// keywords and identifiers
char if char.is_alphabetic() => {
return self.tokenize_keyword_or_identifier(next_char).map(Some)
}
_ => {
return Err(TokenizerError::UnknownSymbolError(
next_char,
self.line,
self.column,
))
}
}
}
if self.returned_eof {
Ok(None)
} else {
self.returned_eof = true;
Ok(Some(Token::new(TokenType::EOF, self.line, self.column)))
}
}
/// Peeks the next token in the stream without consuming it
/// If there are no more tokens in the stream, this function returns None
pub fn peek_next(&mut self) -> Result<Option<Token>, TokenizerError> {
let current_pos = self.reader.stream_position()?;
let column = self.column.clone();
let line = self.line.clone();
let token = self.next_token()?;
self.reader.seek(SeekFrom::Start(current_pos))?;
self.column = column;
self.line = line;
Ok(token)
}
/// Tokenizes a symbol
fn tokenize_symbol(&mut self, first_symbol: char) -> Result<Token, TokenizerError> {
/// Helper macro to create a symbol token
macro_rules! symbol {
($symbol:ident) => {
Ok(Token::new(
TokenType::Symbol(Symbol::$symbol),
self.line,
self.column,
))
};
}
match first_symbol {
// single character symbols
'(' => symbol!(LParen),
')' => symbol!(RParen),
'{' => symbol!(LBrace),
'}' => symbol!(RBrace),
'[' => symbol!(LBracket),
']' => symbol!(RBracket),
';' => symbol!(Semicolon),
':' => symbol!(Colon),
',' => symbol!(Comma),
'+' => symbol!(Plus),
'-' => symbol!(Minus),
'/' => symbol!(Slash),
'.' => symbol!(Dot),
'^' => symbol!(Caret),
// multi-character symbols
'<' if self.peek_next_char()? == Some('=') => {
self.next_char()?;
symbol!(LessThanOrEqual)
}
'<' => symbol!(LessThan),
'>' if self.peek_next_char()? == Some('=') => {
self.next_char()?;
symbol!(GreaterThanOrEqual)
}
'>' => symbol!(GreaterThan),
'=' if self.peek_next_char()? == Some('=') => {
self.next_char()?;
symbol!(Equal)
}
'=' => symbol!(Assign),
'!' if self.peek_next_char()? == Some('=') => {
self.next_char()?;
symbol!(NotEqual)
}
'!' => symbol!(LogicalNot),
'*' if self.peek_next_char()? == Some('*') => {
self.next_char()?;
symbol!(Exp)
}
'*' => symbol!(Asterisk),
'&' if self.peek_next_char()? == Some('&') => {
self.next_char()?;
symbol!(LogicalAnd)
}
'|' if self.peek_next_char()? == Some('|') => {
self.next_char()?;
symbol!(LogicalOr)
}
_ => Err(TokenizerError::UnknownSymbolError(
first_symbol,
self.line,
self.column,
)),
}
}
/// Tokenizes a number literal. Also handles temperatures with a suffix of `c`, `f`, or `k`.
fn tokenize_number(&mut self, first_char: char) -> Result<Token, TokenizerError> {
let mut primary = String::with_capacity(16);
let mut decimal: Option<String> = None;
let mut reading_decimal = false;
let column = self.column.clone();
let line = self.line.clone();
primary.push(first_char);
while let Some(next_char) = self.peek_next_char()? {
if next_char.is_whitespace() {
break;
}
if next_char == '.' {
reading_decimal = true;
self.next_char()?;
continue;
}
// support underscores in numbers for readability
if next_char == '_' {
self.next_char()?;
continue;
}
// This is for the times when we have a number followed by a symbol (like a semicolon or =)
if !next_char.is_numeric() {
break;
}
if reading_decimal {
decimal.get_or_insert_with(String::new).push(next_char);
} else {
primary.push(next_char);
}
self.next_char()?;
}
let number: Number = if let Some(decimal) = decimal {
let decimal_scale = decimal.len() as u32;
let number = format!("{}{}", primary, decimal)
.parse::<i128>()
.map_err(|e| TokenizerError::NumberParseError(e, self.line, self.column))?;
Number::Decimal(
Decimal::try_from_i128_with_scale(number, decimal_scale)
.map_err(|e| TokenizerError::DecimalParseError(e, line, column))?,
)
} else {
Number::Integer(
primary
.parse()
.map_err(|e| TokenizerError::NumberParseError(e, line, column))?,
)
};
// check if the next char is a temperature suffix
if let Some(next_char) = self.peek_next_char()? {
let temperature = match next_char {
'c' => Temperature::Celsius(number),
'f' => Temperature::Fahrenheit(number),
'k' => Temperature::Kelvin(number),
_ => return Ok(Token::new(TokenType::Number(number), line, column)),
}
.to_kelvin();
self.next_char()?;
Ok(Token::new(TokenType::Number(temperature), line, column))
} else {
Ok(Token::new(TokenType::Number(number), line, column))
}
}
/// Tokenizes a string literal
fn tokenize_string(&mut self, beginning_quote: char) -> Result<Token, TokenizerError> {
let mut buffer = String::with_capacity(16);
let column = self.column.clone();
let line = self.line.clone();
while let Some(next_char) = self.next_char()? {
if next_char == beginning_quote {
break;
}
buffer.push(next_char);
}
Ok(Token::new(TokenType::String(buffer), line, column))
}
/// Tokenizes a keyword or an identifier. Also handles boolean literals
fn tokenize_keyword_or_identifier(
&mut self,
first_char: char,
) -> Result<Token, TokenizerError> {
macro_rules! keyword {
($keyword:ident) => {{
return Ok(Token::new(
TokenType::Keyword(Keyword::$keyword),
self.line,
self.column,
));
}};
}
/// Helper macro to check if the next character is whitespace or not alphanumeric
macro_rules! next_ws {
() => {
matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || matches!(self.peek_next_char()?, None)
};
}
let mut buffer = String::with_capacity(16);
let line = self.line.clone();
let column = self.column.clone();
let mut looped_char = Some(first_char);
while let Some(next_char) = looped_char {
if next_char.is_whitespace() {
break;
}
if !next_char.is_alphanumeric() {
break;
}
buffer.push(next_char);
match buffer.as_str() {
"let" if next_ws!() => keyword!(Let),
"fn" if next_ws!() => keyword!(Fn),
"if" if next_ws!() => keyword!(If),
"else" if next_ws!() => keyword!(Else),
"return" if next_ws!() => keyword!(Return),
"enum" if next_ws!() => keyword!(Enum),
"device" if next_ws!() => keyword!(Device),
"loop" if next_ws!() => keyword!(Loop),
"break" if next_ws!() => keyword!(Break),
// boolean literals
"true" if next_ws!() => {
return Ok(Token::new(TokenType::Boolean(true), self.line, self.column))
}
"false" if next_ws!() => {
return Ok(Token::new(
TokenType::Boolean(false),
self.line,
self.column,
))
}
// if the next character is whitespace or not alphanumeric, then we have an identifier
// this is because keywords are checked first
val if next_ws!() => {
return Ok(Token::new(
TokenType::Identifier(val.to_string()),
line,
column,
));
}
_ => {}
}
looped_char = self.next_char()?;
}
Err(TokenizerError::UnknownKeywordOrIdentifierError(
buffer, line, column,
))
}
}
pub struct TokenizerBuffer {
tokenizer: Tokenizer,
buffer: VecDeque<Token>,
history: VecDeque<Token>,
}
impl TokenizerBuffer {
pub fn new(tokenizer: Tokenizer) -> Self {
Self {
tokenizer,
buffer: VecDeque::new(),
history: VecDeque::with_capacity(128),
}
}
/// Reads the next token from the tokenizer, pushing the value to the back of the history
/// and returning the token
pub fn next(&mut self) -> Result<Option<Token>, TokenizerError> {
if let Some(token) = self.buffer.pop_front() {
self.history.push_back(token.clone());
return Ok(Some(token));
}
let token = self.tokenizer.next_token()?;
if let Some(ref token) = token {
self.history.push_back(token.clone());
}
Ok(token)
}
/// Peeks the next token in the stream without adding to the history stack
pub fn peek(&mut self) -> Result<Option<Token>, TokenizerError> {
if let Some(token) = self.buffer.front() {
return Ok(Some(token.clone()));
}
let token = self.tokenizer.peek_next()?;
Ok(token)
}
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> {
use Ordering::*;
// if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer
// if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history)
// if seek_to == 0 then we don't need to do anything
match seek_to.cmp(&0) {
Greater => {
let mut tokens = Vec::with_capacity(seek_to as usize);
for _ in 0..seek_to {
if let Some(token) = self.tokenizer.next_token()? {
tokens.push(token);
} else {
return Err(TokenizerError::IOError(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"Unexpected EOF",
)));
}
}
self.history.extend(tokens);
}
Less => {
let seek_to = seek_to.unsigned_abs() as usize;
let mut tokens = Vec::with_capacity(seek_to);
for _ in 0..seek_to {
if let Some(token) = self.history.pop_back() {
tokens.push(token);
} else {
return Err(TokenizerError::IOError(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"Unexpected EOF",
)));
}
}
self.buffer.extend(tokens.into_iter().rev());
}
_ => {}
}
Ok(())
}
/// Adds to or removes from the History stack, allowing the user to move back and forth in the stream
pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> {
match from {
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"),
SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"),
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use anyhow::Result;
use rust_decimal::Decimal;
const TEST_FILE: &str = "tests/file.stlg";
const TEST_STRING: &str = r#"
fn test() {
let x = 10;
return x + 2;
}
"#;
#[test]
fn test_seek_from_current() -> Result<()> {
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let mut buffer = TokenizerBuffer::new(tokenizer);
let token = buffer.next()?.unwrap();
assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
buffer.seek(SeekFrom::Current(1))?;
let token = buffer.next()?.unwrap();
assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen));
Ok(())
}
#[test]
fn test_tokenizer_from_path_ok() {
let tokenizer = Tokenizer::from_path(TEST_FILE);
assert!(tokenizer.is_ok());
}
#[test]
fn test_tokenizer_from_path_err() {
let tokenizer = Tokenizer::from_path("non_existent_file.stlg");
assert!(tokenizer.is_err());
}
#[test]
fn test_next_char() -> Result<()> {
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let char = tokenizer.next_char()?;
assert_eq!(char, Some('\n'));
assert_eq!(tokenizer.line, 2);
assert_eq!(tokenizer.column, 1);
let mut tokenizer = Tokenizer::from(String::from("fn"));
let char = tokenizer.next_char()?;
assert_eq!(char, Some('f'));
assert_eq!(tokenizer.line, 1);
assert_eq!(tokenizer.column, 2);
Ok(())
}
#[test]
fn test_peek_next_char() -> Result<()> {
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let char = tokenizer.peek_next_char()?;
assert_eq!(char, Some('\n'));
assert_eq!(tokenizer.line, 1);
assert_eq!(tokenizer.column, 1);
let char = tokenizer.next_char()?;
assert_eq!(char, Some('\n'));
assert_eq!(tokenizer.line, 2);
assert_eq!(tokenizer.column, 1);
let char = tokenizer.peek_next_char()?;
assert_eq!(char, Some(' '));
assert_eq!(tokenizer.line, 2);
assert_eq!(tokenizer.column, 1);
Ok(())
}
#[test]
fn test_temperature_unit() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("10c 14f 10k"));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(
token.token_type,
TokenType::Number(Number::Decimal(Decimal::new(28315, 2)))
);
let token = tokenizer.next_token()?.unwrap();
assert_eq!(
token.token_type,
TokenType::Number(Number::Decimal(Decimal::new(26315, 2)))
);
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
Ok(())
}
#[test]
fn test_parse_integer() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("10"));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
Ok(())
}
#[test]
fn test_parse_integer_with_underscore() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("1_000"));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Number(Number::Integer(1000)));
Ok(())
}
#[test]
fn test_parse_decimal() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("10.5"));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(
token.token_type,
TokenType::Number(Number::Decimal(Decimal::new(105, 1))) // 10.5
);
Ok(())
}
#[test]
fn test_parse_decimal_with_underscore() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("1_000.000_6"));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(
token.token_type,
TokenType::Number(Number::Decimal(Decimal::new(10000006, 4))) // 1000.0006
);
Ok(())
}
#[test]
fn test_parse_number_with_symbol() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("10;"));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
let next_char = tokenizer.next_char()?;
assert_eq!(next_char, Some(';'));
Ok(())
}
#[test]
fn test_string_parse() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from(r#""Hello, World!""#));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(
token.token_type,
TokenType::String(String::from("Hello, World!"))
);
let mut tokenizer = Tokenizer::from(String::from(r#"'Hello, World!'"#));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(
token.token_type,
TokenType::String(String::from("Hello, World!"))
);
Ok(())
}
#[test]
fn test_symbol_parse() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from(
"^ ! () [] {} , . ; : + - * / < > = != && || >= <=**",
));
let expected_tokens = vec![
TokenType::Symbol(Symbol::Caret),
TokenType::Symbol(Symbol::LogicalNot),
TokenType::Symbol(Symbol::LParen),
TokenType::Symbol(Symbol::RParen),
TokenType::Symbol(Symbol::LBracket),
TokenType::Symbol(Symbol::RBracket),
TokenType::Symbol(Symbol::LBrace),
TokenType::Symbol(Symbol::RBrace),
TokenType::Symbol(Symbol::Comma),
TokenType::Symbol(Symbol::Dot),
TokenType::Symbol(Symbol::Semicolon),
TokenType::Symbol(Symbol::Colon),
TokenType::Symbol(Symbol::Plus),
TokenType::Symbol(Symbol::Minus),
TokenType::Symbol(Symbol::Asterisk),
TokenType::Symbol(Symbol::Slash),
TokenType::Symbol(Symbol::LessThan),
TokenType::Symbol(Symbol::GreaterThan),
TokenType::Symbol(Symbol::Assign),
TokenType::Symbol(Symbol::NotEqual),
TokenType::Symbol(Symbol::LogicalAnd),
TokenType::Symbol(Symbol::LogicalOr),
TokenType::Symbol(Symbol::GreaterThanOrEqual),
TokenType::Symbol(Symbol::LessThanOrEqual),
TokenType::Symbol(Symbol::Exp),
];
for expected_token in expected_tokens {
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, expected_token);
}
Ok(())
}
#[test]
fn test_keyword_parse() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("let fn if else return enum"));
let expected_tokens = vec![
TokenType::Keyword(Keyword::Let),
TokenType::Keyword(Keyword::Fn),
TokenType::Keyword(Keyword::If),
TokenType::Keyword(Keyword::Else),
TokenType::Keyword(Keyword::Return),
TokenType::Keyword(Keyword::Enum),
];
for expected_token in expected_tokens {
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, expected_token);
}
Ok(())
}
#[test]
fn test_identifier_parse() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("fn test"));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(
token.token_type,
TokenType::Identifier(String::from("test"))
);
Ok(())
}
#[test]
fn test_boolean_parse() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from("true false"));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Boolean(true));
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, TokenType::Boolean(false));
Ok(())
}
#[test]
fn test_full_source() -> Result<()> {
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let expected_tokens = vec![
TokenType::Keyword(Keyword::Fn),
TokenType::Identifier(String::from("test")),
TokenType::Symbol(Symbol::LParen),
TokenType::Symbol(Symbol::RParen),
TokenType::Symbol(Symbol::LBrace),
TokenType::Keyword(Keyword::Let),
TokenType::Identifier(String::from("x")),
TokenType::Symbol(Symbol::Assign),
TokenType::Number(Number::Integer(10)),
TokenType::Symbol(Symbol::Semicolon),
TokenType::Keyword(Keyword::Return),
TokenType::Identifier(String::from("x")),
TokenType::Symbol(Symbol::Plus),
TokenType::Number(Number::Integer(2)),
TokenType::Symbol(Symbol::Semicolon),
TokenType::Symbol(Symbol::RBrace),
];
for expected_token in expected_tokens {
let token = tokenizer.next_token()?.unwrap();
assert_eq!(token.token_type, expected_token);
}
Ok(())
}
#[test]
fn test_peek_next() -> Result<()> {
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let column = tokenizer.column.clone();
let line = tokenizer.line.clone();
let peeked_token = tokenizer.peek_next()?;
assert_eq!(
peeked_token.unwrap().token_type,
TokenType::Keyword(Keyword::Fn)
);
assert_eq!(tokenizer.column, column);
assert_eq!(tokenizer.line, line);
let next_token = tokenizer.next_token()?;
assert_eq!(
next_token.unwrap().token_type,
TokenType::Keyword(Keyword::Fn)
);
assert_ne!(tokenizer.column, column);
assert_ne!(tokenizer.line, line);
Ok(())
}
}

View File

@@ -1,221 +0,0 @@
use rust_decimal::Decimal;
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token {
/// The type of the token
pub token_type: TokenType,
/// The line where the token was found
pub line: usize,
/// The column where the token was found
pub column: usize,
}
impl Token {
pub fn new(token_type: TokenType, line: usize, column: usize) -> Self {
Self {
token_type,
line,
column,
}
}
}
#[derive(Debug, PartialEq, Hash, Eq, Clone)]
pub enum Temperature {
Celsius(Number),
Fahrenheit(Number),
Kelvin(Number),
}
impl std::fmt::Display for Temperature {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Temperature::Celsius(n) => write!(f, "{}°C", n),
Temperature::Fahrenheit(n) => write!(f, "{}°F", n),
Temperature::Kelvin(n) => write!(f, "{}K", n),
}
}
}
impl Temperature {
pub fn to_kelvin(self) -> Number {
match self {
Temperature::Celsius(n) => {
let n = match n {
Number::Integer(i) => Decimal::new(i as i64, 0),
Number::Decimal(d) => d,
};
Number::Decimal(n + Decimal::new(27315, 2))
}
Temperature::Fahrenheit(n) => {
let n = match n {
Number::Integer(i) => Decimal::new(i as i64, 0),
Number::Decimal(d) => d,
};
let a = n - Decimal::new(32, 0);
let b = Decimal::new(5, 0) / Decimal::new(9, 0);
Number::Decimal(a * b + Decimal::new(27315, 2))
}
Temperature::Kelvin(n) => n,
}
}
}
#[derive(Debug, PartialEq, Hash, Eq, Clone)]
pub enum TokenType {
/// Represents a string token
String(String),
/// Represents a number token
Number(Number),
/// Represents a boolean token
Boolean(bool),
/// Represents a keyword token
Keyword(Keyword),
/// Represents an identifier token
Identifier(String),
/// Represents a symbol token
Symbol(Symbol),
/// Represents an end of file token
EOF,
}
impl std::fmt::Display for TokenType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenType::String(s) => write!(f, "{}", s),
TokenType::Number(n) => write!(f, "{}", n),
TokenType::Boolean(b) => write!(f, "{}", b),
TokenType::Keyword(k) => write!(f, "{:?}", k),
TokenType::Identifier(i) => write!(f, "{}", i),
TokenType::Symbol(s) => write!(f, "{:?}", s),
TokenType::EOF => write!(f, "EOF"),
}
}
}
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
pub enum Number {
/// Represents an integer number
Integer(u128),
/// Represents a decimal type number with a precision of 64 bits
Decimal(Decimal),
}
impl std::fmt::Display for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Number::Integer(i) => write!(f, "{}", i),
Number::Decimal(d) => write!(f, "{}", d.to_string()),
}
}
}
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
pub enum Symbol {
// Single Character Symbols
/// Represents the `(` symbol
LParen,
/// Represents the `)` symbol
RParen,
/// Represents the `{` symbol
LBrace,
/// Represents the `}` symbol
RBrace,
/// Represents the `[` symbol
LBracket,
/// Represents the `]` symbol
RBracket,
/// Represents the `;` symbol
Semicolon,
/// Represents the `:` symbol
Colon,
/// Represents the `,` symbol
Comma,
/// Represents the `+` symbol
Plus,
/// Represents the `-` symbol
Minus,
/// Represents the `*` symbol
Asterisk,
/// Represents the `/` symbol
Slash,
/// Represents the `<` symbol
LessThan,
/// Represents the `>` symbol
GreaterThan,
/// Represents the `=` symbol
Assign,
/// Represents the `!` symbol
LogicalNot,
/// Represents the `.` symbol
Dot,
/// Represents the `^` symbol
Caret,
// Double Character Symbols
/// Represents the `==` symbol
Equal,
/// Represents the `!=` symbol
NotEqual,
/// Represents the `&&` Symbol
LogicalAnd,
// Represents the `||` Symbol
LogicalOr,
/// Represents the `<=` symbol
LessThanOrEqual,
/// Represents the `>=` symbol
GreaterThanOrEqual,
/// Represents the `**` symbol
Exp,
}
impl Symbol {
pub fn is_operator(&self) -> bool {
match self {
Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp => true,
_ => false,
}
}
pub fn is_comparison(&self) -> bool {
match self {
Symbol::LessThan
| Symbol::GreaterThan
| Symbol::Equal
| Symbol::NotEqual
| Symbol::LessThanOrEqual
| Symbol::GreaterThanOrEqual => true,
_ => false,
}
}
pub fn is_logical(&self) -> bool {
match self {
Symbol::LogicalAnd | Symbol::LogicalOr => true,
_ => false,
}
}
}
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
pub enum Keyword {
/// Represents the `let` keyword
Let,
/// Represents the `fn` keyword
Fn,
/// Represents the `if` keyword
If,
/// Represents the `device` keyword. Useful for defining a device at a specific address (ex. d0, d1, d2, etc.)
Device,
/// Represents the `else` keyword
Else,
/// Represents the `return` keyword
Return,
/// Represents the `enum` keyword
Enum,
/// Represents the `loop` keyword
Loop,
/// Represents the `break` keyword
Break,
}