wip
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -602,6 +602,7 @@ dependencies = [
|
|||||||
"clap",
|
"clap",
|
||||||
"quick-error",
|
"quick-error",
|
||||||
"rust_decimal",
|
"rust_decimal",
|
||||||
|
"tokenizer",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ path = "src/main.rs"
|
|||||||
clap = { version = "^4.5", features = ["derive"] }
|
clap = { version = "^4.5", features = ["derive"] }
|
||||||
quick-error = { workspace = true }
|
quick-error = { workspace = true }
|
||||||
rust_decimal = { workspace = true }
|
rust_decimal = { workspace = true }
|
||||||
|
tokenizer = { path = "libs/tokenizer" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
anyhow = { version = "^1.0", features = ["backtrace"] }
|
anyhow = { version = "^1.0", features = ["backtrace"] }
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ pub trait Tokenize: Read + Seek {}
|
|||||||
|
|
||||||
impl<T> Tokenize for T where T: Read + Seek {}
|
impl<T> Tokenize for T where T: Read + Seek {}
|
||||||
|
|
||||||
pub(crate) struct Tokenizer {
|
pub struct Tokenizer {
|
||||||
reader: BufReader<Box<dyn Tokenize>>,
|
reader: BufReader<Box<dyn Tokenize>>,
|
||||||
char_buffer: [u8; 1],
|
char_buffer: [u8; 1],
|
||||||
line: usize,
|
line: usize,
|
||||||
@@ -185,8 +185,8 @@ impl Tokenizer {
|
|||||||
/// If there are no more tokens in the stream, this function returns None
|
/// If there are no more tokens in the stream, this function returns None
|
||||||
pub fn peek_next(&mut self) -> Result<Option<Token>, TokenizerError> {
|
pub fn peek_next(&mut self) -> Result<Option<Token>, TokenizerError> {
|
||||||
let current_pos = self.reader.stream_position()?;
|
let current_pos = self.reader.stream_position()?;
|
||||||
let column = self.column.clone();
|
let column = self.column;
|
||||||
let line = self.line.clone();
|
let line = self.line;
|
||||||
|
|
||||||
let token = self.next_token()?;
|
let token = self.next_token()?;
|
||||||
self.reader.seek(SeekFrom::Start(current_pos))?;
|
self.reader.seek(SeekFrom::Start(current_pos))?;
|
||||||
@@ -280,8 +280,8 @@ impl Tokenizer {
|
|||||||
let mut decimal: Option<String> = None;
|
let mut decimal: Option<String> = None;
|
||||||
let mut reading_decimal = false;
|
let mut reading_decimal = false;
|
||||||
|
|
||||||
let column = self.column.clone();
|
let column = self.column;
|
||||||
let line = self.line.clone();
|
let line = self.line;
|
||||||
|
|
||||||
primary.push(first_char);
|
primary.push(first_char);
|
||||||
|
|
||||||
@@ -353,8 +353,8 @@ impl Tokenizer {
|
|||||||
fn tokenize_string(&mut self, beginning_quote: char) -> Result<Token, TokenizerError> {
|
fn tokenize_string(&mut self, beginning_quote: char) -> Result<Token, TokenizerError> {
|
||||||
let mut buffer = String::with_capacity(16);
|
let mut buffer = String::with_capacity(16);
|
||||||
|
|
||||||
let column = self.column.clone();
|
let column = self.column;
|
||||||
let line = self.line.clone();
|
let line = self.line;
|
||||||
|
|
||||||
while let Some(next_char) = self.next_char()? {
|
while let Some(next_char) = self.next_char()? {
|
||||||
if next_char == beginning_quote {
|
if next_char == beginning_quote {
|
||||||
@@ -385,13 +385,13 @@ impl Tokenizer {
|
|||||||
/// Helper macro to check if the next character is whitespace or not alphanumeric
|
/// Helper macro to check if the next character is whitespace or not alphanumeric
|
||||||
macro_rules! next_ws {
|
macro_rules! next_ws {
|
||||||
() => {
|
() => {
|
||||||
matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || matches!(self.peek_next_char()?, None)
|
matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || self.peek_next_char()?.is_none()
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut buffer = String::with_capacity(16);
|
let mut buffer = String::with_capacity(16);
|
||||||
let line = self.line.clone();
|
let line = self.line;
|
||||||
let column = self.column.clone();
|
let column = self.column;
|
||||||
|
|
||||||
let mut looped_char = Some(first_char);
|
let mut looped_char = Some(first_char);
|
||||||
|
|
||||||
@@ -464,7 +464,7 @@ impl TokenizerBuffer {
|
|||||||
|
|
||||||
/// Reads the next token from the tokenizer, pushing the value to the back of the history
|
/// Reads the next token from the tokenizer, pushing the value to the back of the history
|
||||||
/// and returning the token
|
/// and returning the token
|
||||||
pub fn next(&mut self) -> Result<Option<Token>, TokenizerError> {
|
pub fn next_token(&mut self) -> Result<Option<Token>, TokenizerError> {
|
||||||
if let Some(token) = self.buffer.pop_front() {
|
if let Some(token) = self.buffer.pop_front() {
|
||||||
self.history.push_back(token.clone());
|
self.history.push_back(token.clone());
|
||||||
return Ok(Some(token));
|
return Ok(Some(token));
|
||||||
@@ -561,12 +561,12 @@ mod tests {
|
|||||||
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
||||||
let mut buffer = TokenizerBuffer::new(tokenizer);
|
let mut buffer = TokenizerBuffer::new(tokenizer);
|
||||||
|
|
||||||
let token = buffer.next()?.unwrap();
|
let token = buffer.next_token()?.unwrap();
|
||||||
assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
|
assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
|
||||||
|
|
||||||
buffer.seek(SeekFrom::Current(1))?;
|
buffer.seek(SeekFrom::Current(1))?;
|
||||||
|
|
||||||
let token = buffer.next()?.unwrap();
|
let token = buffer.next_token()?.unwrap();
|
||||||
|
|
||||||
assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen));
|
assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen));
|
||||||
|
|
||||||
@@ -870,8 +870,8 @@ mod tests {
|
|||||||
fn test_peek_next() -> Result<()> {
|
fn test_peek_next() -> Result<()> {
|
||||||
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
||||||
|
|
||||||
let column = tokenizer.column.clone();
|
let column = tokenizer.column;
|
||||||
let line = tokenizer.line.clone();
|
let line = tokenizer.line;
|
||||||
|
|
||||||
let peeked_token = tokenizer.peek_next()?;
|
let peeked_token = tokenizer.peek_next()?;
|
||||||
|
|
||||||
@@ -894,4 +894,3 @@ mod tests {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ impl std::fmt::Display for Number {
|
|||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Number::Integer(i) => write!(f, "{}", i),
|
Number::Integer(i) => write!(f, "{}", i),
|
||||||
Number::Decimal(d) => write!(f, "{}", d.to_string()),
|
Number::Decimal(d) => write!(f, "{}", d),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -172,29 +172,26 @@ pub enum Symbol {
|
|||||||
|
|
||||||
impl Symbol {
|
impl Symbol {
|
||||||
pub fn is_operator(&self) -> bool {
|
pub fn is_operator(&self) -> bool {
|
||||||
match self {
|
matches!(
|
||||||
Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp => true,
|
self,
|
||||||
_ => false,
|
Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp
|
||||||
}
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_comparison(&self) -> bool {
|
pub fn is_comparison(&self) -> bool {
|
||||||
match self {
|
matches!(
|
||||||
|
self,
|
||||||
Symbol::LessThan
|
Symbol::LessThan
|
||||||
| Symbol::GreaterThan
|
| Symbol::GreaterThan
|
||||||
| Symbol::Equal
|
| Symbol::Equal
|
||||||
| Symbol::NotEqual
|
| Symbol::NotEqual
|
||||||
| Symbol::LessThanOrEqual
|
| Symbol::LessThanOrEqual
|
||||||
| Symbol::GreaterThanOrEqual => true,
|
| Symbol::GreaterThanOrEqual,
|
||||||
_ => false,
|
)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_logical(&self) -> bool {
|
pub fn is_logical(&self) -> bool {
|
||||||
match self {
|
matches!(self, Symbol::LogicalAnd | Symbol::LogicalOr)
|
||||||
Symbol::LogicalAnd | Symbol::LogicalOr => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -127,17 +127,14 @@ impl<'a> Compiler<'a> {
|
|||||||
|
|
||||||
fn expression(&mut self, expression: Expression) -> Result<(), CompileError> {
|
fn expression(&mut self, expression: Expression) -> Result<(), CompileError> {
|
||||||
match expression {
|
match expression {
|
||||||
Expression::FunctionExpression(expr) => self.function_expression(expr)?,
|
Expression::Function(expr) => self.function_expression(expr)?,
|
||||||
Expression::BlockExpression(expr) => self.block_expression(expr)?,
|
Expression::Block(expr) => self.block_expression(expr)?,
|
||||||
Expression::InvocationExpression(expr) => self.invocation_expression(expr)?,
|
Expression::Invocation(expr) => self.invocation_expression(expr)?,
|
||||||
Expression::BinaryExpression(expr) => self.binary_expression(expr)?,
|
Expression::Binary(expr) => self.binary_expression(expr)?,
|
||||||
Expression::DeclarationExpression(var_name, expr) => {
|
Expression::Declaration(var_name, expr) => {
|
||||||
self.declaration_expression(&var_name, *expr)?
|
self.declaration_expression(&var_name, *expr)?
|
||||||
}
|
}
|
||||||
Expression::DeviceDeclarationExpression(DeviceDeclarationExpression {
|
Expression::DeviceDeclaration(DeviceDeclarationExpression { name, device }) => {
|
||||||
name,
|
|
||||||
device,
|
|
||||||
}) => {
|
|
||||||
self.devices.insert(name, device);
|
self.devices.insert(name, device);
|
||||||
}
|
}
|
||||||
_ => todo!("{:?}", expression),
|
_ => todo!("{:?}", expression),
|
||||||
@@ -156,11 +153,11 @@ impl<'a> Compiler<'a> {
|
|||||||
self.push_stack(var_name)?;
|
self.push_stack(var_name)?;
|
||||||
self.write_output(format!("push {num}"))?;
|
self.write_output(format!("push {num}"))?;
|
||||||
}
|
}
|
||||||
Expression::BinaryExpression(expr) => {
|
Expression::Binary(expr) => {
|
||||||
self.binary_expression(expr)?;
|
self.binary_expression(expr)?;
|
||||||
self.push_stack(var_name)?;
|
self.push_stack(var_name)?;
|
||||||
}
|
}
|
||||||
Expression::SyscallExpression(expr) => {
|
Expression::Syscall(expr) => {
|
||||||
self.syscall_declaration_expression(expr)?;
|
self.syscall_declaration_expression(expr)?;
|
||||||
self.push_stack(var_name)?;
|
self.push_stack(var_name)?;
|
||||||
}
|
}
|
||||||
@@ -172,6 +169,7 @@ impl<'a> Compiler<'a> {
|
|||||||
|
|
||||||
fn syscall_declaration_expression(&mut self, expr: SysCall) -> Result<(), CompileError> {
|
fn syscall_declaration_expression(&mut self, expr: SysCall) -> Result<(), CompileError> {
|
||||||
use crate::parser::sys_call::System;
|
use crate::parser::sys_call::System;
|
||||||
|
#[allow(clippy::collapsible_match)]
|
||||||
match expr {
|
match expr {
|
||||||
SysCall::System(ref sys) => match sys {
|
SysCall::System(ref sys) => match sys {
|
||||||
System::LoadFromDevice(LiteralOrVariable::Variable(device), value) => {
|
System::LoadFromDevice(LiteralOrVariable::Variable(device), value) => {
|
||||||
@@ -212,12 +210,12 @@ impl<'a> Compiler<'a> {
|
|||||||
compiler.write_output("push r15")?;
|
compiler.write_output("push r15")?;
|
||||||
compiler.push_stack(&format!("{op}ExpressionLeft"))?;
|
compiler.push_stack(&format!("{op}ExpressionLeft"))?;
|
||||||
}
|
}
|
||||||
Expression::BinaryExpression(expr) => {
|
Expression::Binary(expr) => {
|
||||||
compiler.binary_expression(expr)?;
|
compiler.binary_expression(expr)?;
|
||||||
compiler.push_stack(&format!("{op}ExpressionLeft"))?;
|
compiler.push_stack(&format!("{op}ExpressionLeft"))?;
|
||||||
}
|
}
|
||||||
Expression::PriorityExpression(expr) => match *expr {
|
Expression::Priority(expr) => match *expr {
|
||||||
Expression::BinaryExpression(expr) => {
|
Expression::Binary(expr) => {
|
||||||
compiler.binary_expression(expr)?;
|
compiler.binary_expression(expr)?;
|
||||||
compiler.push_stack(&format!("{op}ExpressionLeft"))?;
|
compiler.push_stack(&format!("{op}ExpressionLeft"))?;
|
||||||
}
|
}
|
||||||
@@ -238,12 +236,12 @@ impl<'a> Compiler<'a> {
|
|||||||
compiler.write_output("push r15")?;
|
compiler.write_output("push r15")?;
|
||||||
compiler.push_stack(&format!("{op}ExpressionRight"))?;
|
compiler.push_stack(&format!("{op}ExpressionRight"))?;
|
||||||
}
|
}
|
||||||
Expression::BinaryExpression(expr) => {
|
Expression::Binary(expr) => {
|
||||||
compiler.binary_expression(expr)?;
|
compiler.binary_expression(expr)?;
|
||||||
compiler.push_stack(&format!("{op}ExpressionRight"))?;
|
compiler.push_stack(&format!("{op}ExpressionRight"))?;
|
||||||
}
|
}
|
||||||
Expression::PriorityExpression(expr) => match *expr {
|
Expression::Priority(expr) => match *expr {
|
||||||
Expression::BinaryExpression(expr) => {
|
Expression::Binary(expr) => {
|
||||||
compiler.binary_expression(expr)?;
|
compiler.binary_expression(expr)?;
|
||||||
compiler.push_stack(&format!("{op}ExpressionRight"))?;
|
compiler.push_stack(&format!("{op}ExpressionRight"))?;
|
||||||
}
|
}
|
||||||
@@ -304,7 +302,7 @@ impl<'a> Compiler<'a> {
|
|||||||
to_write.push_str("get r15 db r15\n");
|
to_write.push_str("get r15 db r15\n");
|
||||||
to_write.push_str("push r15\n");
|
to_write.push_str("push r15\n");
|
||||||
}
|
}
|
||||||
Expression::BinaryExpression(expr) => {
|
Expression::Binary(expr) => {
|
||||||
self.binary_expression(expr)?;
|
self.binary_expression(expr)?;
|
||||||
to_write.push_str("push r0\n");
|
to_write.push_str("push r0\n");
|
||||||
}
|
}
|
||||||
@@ -353,11 +351,9 @@ impl<'a> Compiler<'a> {
|
|||||||
|
|
||||||
// hoist functions to the top of the block
|
// hoist functions to the top of the block
|
||||||
expression.0.sort_by(|a, b| {
|
expression.0.sort_by(|a, b| {
|
||||||
if matches!(a, Expression::FunctionExpression(_))
|
if matches!(a, Expression::Function(_)) && matches!(b, Expression::Function(_)) {
|
||||||
&& matches!(b, Expression::FunctionExpression(_))
|
|
||||||
{
|
|
||||||
Ordering::Equal
|
Ordering::Equal
|
||||||
} else if matches!(a, Expression::FunctionExpression(_)) {
|
} else if matches!(a, Expression::Function(_)) {
|
||||||
Ordering::Less
|
Ordering::Less
|
||||||
} else {
|
} else {
|
||||||
Ordering::Greater
|
Ordering::Greater
|
||||||
@@ -366,7 +362,7 @@ impl<'a> Compiler<'a> {
|
|||||||
|
|
||||||
for expr in expression.0 {
|
for expr in expression.0 {
|
||||||
// if we haven't declared main yet and we have already declared all the function expressions, declare main
|
// if we haven't declared main yet and we have already declared all the function expressions, declare main
|
||||||
if !self.declared_main && !matches!(expr, Expression::FunctionExpression(_)) {
|
if !self.declared_main && !matches!(expr, Expression::Function(_)) {
|
||||||
self.write_output("main:")?;
|
self.write_output("main:")?;
|
||||||
self.declared_main = true;
|
self.declared_main = true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ extern crate quick_error;
|
|||||||
|
|
||||||
mod compiler;
|
mod compiler;
|
||||||
mod parser;
|
mod parser;
|
||||||
mod tokenizer;
|
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use compiler::Compiler;
|
use compiler::Compiler;
|
||||||
@@ -11,6 +10,7 @@ use parser::Parser as ASTParser;
|
|||||||
use std::{
|
use std::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{BufWriter, Read, Write},
|
io::{BufWriter, Read, Write},
|
||||||
|
path::PathBuf,
|
||||||
};
|
};
|
||||||
use tokenizer::{Tokenizer, TokenizerError};
|
use tokenizer::{Tokenizer, TokenizerError};
|
||||||
|
|
||||||
@@ -49,10 +49,10 @@ quick_error! {
|
|||||||
struct Args {
|
struct Args {
|
||||||
/// What file should be compiled. If not set, input will be read from stdin.
|
/// What file should be compiled. If not set, input will be read from stdin.
|
||||||
#[arg(short, long)]
|
#[arg(short, long)]
|
||||||
input_file: Option<String>,
|
input_file: Option<PathBuf>,
|
||||||
/// The output file for the compiled program. If not set, output will go to stdout.
|
/// The output file for the compiled program. If not set, output will go to stdout.
|
||||||
#[arg(short, long)]
|
#[arg(short, long)]
|
||||||
output_file: Option<String>,
|
output_file: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_logic() -> Result<(), StationlangError> {
|
fn run_logic() -> Result<(), StationlangError> {
|
||||||
|
|||||||
@@ -1,15 +1,13 @@
|
|||||||
pub mod sys_call;
|
pub mod sys_call;
|
||||||
pub mod tree_node;
|
pub mod tree_node;
|
||||||
|
|
||||||
use crate::{
|
use crate::boxed;
|
||||||
boxed,
|
|
||||||
tokenizer::{
|
|
||||||
token::{Keyword, Symbol, Token, TokenType},
|
|
||||||
Tokenizer, TokenizerBuffer, TokenizerError,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
use std::io::SeekFrom;
|
use std::io::SeekFrom;
|
||||||
use sys_call::SysCall;
|
use sys_call::SysCall;
|
||||||
|
use tokenizer::{
|
||||||
|
token::{Keyword, Symbol, Token, TokenType},
|
||||||
|
Tokenizer, TokenizerBuffer, TokenizerError,
|
||||||
|
};
|
||||||
use tree_node::*;
|
use tree_node::*;
|
||||||
|
|
||||||
quick_error! {
|
quick_error! {
|
||||||
@@ -119,9 +117,7 @@ impl Parser {
|
|||||||
expressions.push(expression);
|
expressions.push(expression);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Some(Expression::BlockExpression(BlockExpression(
|
Ok(Some(Expression::Block(BlockExpression(expressions))))
|
||||||
expressions,
|
|
||||||
))))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses the input from the tokenizer buffer and returns the resulting expression
|
/// Parses the input from the tokenizer buffer and returns the resulting expression
|
||||||
@@ -138,7 +134,7 @@ impl Parser {
|
|||||||
|
|
||||||
/// Assigns the next token in the tokenizer buffer to the current token
|
/// Assigns the next token in the tokenizer buffer to the current token
|
||||||
fn assign_next(&mut self) -> Result<(), ParseError> {
|
fn assign_next(&mut self) -> Result<(), ParseError> {
|
||||||
self.current_token = self.tokenizer.next()?;
|
self.current_token = self.tokenizer.next_token()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -174,43 +170,41 @@ impl Parser {
|
|||||||
// match declarations with a `let` keyword
|
// match declarations with a `let` keyword
|
||||||
TokenType::Keyword(Keyword::Let) => self.declaration()?,
|
TokenType::Keyword(Keyword::Let) => self.declaration()?,
|
||||||
|
|
||||||
TokenType::Keyword(Keyword::Device) => {
|
TokenType::Keyword(Keyword::Device) => Expression::DeviceDeclaration(self.device()?),
|
||||||
Expression::DeviceDeclarationExpression(self.device()?)
|
|
||||||
}
|
|
||||||
|
|
||||||
// match functions with a `fn` keyword
|
// match functions with a `fn` keyword
|
||||||
TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?),
|
TokenType::Keyword(Keyword::Fn) => Expression::Function(self.function()?),
|
||||||
|
|
||||||
// match syscalls with a `syscall` keyword
|
// match syscalls with a `syscall` keyword
|
||||||
TokenType::Identifier(ref id) if SysCall::is_syscall(id) => {
|
TokenType::Identifier(ref id) if SysCall::is_syscall(id) => {
|
||||||
Expression::SyscallExpression(self.syscall()?)
|
Expression::Syscall(self.syscall()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
// match a variable expression with opening parenthesis
|
// match a variable expression with opening parenthesis
|
||||||
TokenType::Identifier(_)
|
TokenType::Identifier(_)
|
||||||
if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) =>
|
if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) =>
|
||||||
{
|
{
|
||||||
Expression::InvocationExpression(self.invocation()?)
|
Expression::Invocation(self.invocation()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
// match a variable expression with an assignment
|
// match a variable expression with an assignment
|
||||||
TokenType::Identifier(_)
|
TokenType::Identifier(_)
|
||||||
if self_matches_peek!(self, TokenType::Symbol(Symbol::Assign)) =>
|
if self_matches_peek!(self, TokenType::Symbol(Symbol::Assign)) =>
|
||||||
{
|
{
|
||||||
Expression::AssignmentExpression(self.assignment()?)
|
Expression::Assignment(self.assignment()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
// match variable expressions with an identifier
|
// match variable expressions with an identifier
|
||||||
TokenType::Identifier(ref id) => Expression::Variable(id.clone()),
|
TokenType::Identifier(ref id) => Expression::Variable(id.clone()),
|
||||||
|
|
||||||
// match block expressions with a `{` symbol
|
// match block expressions with a `{` symbol
|
||||||
TokenType::Symbol(Symbol::LBrace) => Expression::BlockExpression(self.block()?),
|
TokenType::Symbol(Symbol::LBrace) => Expression::Block(self.block()?),
|
||||||
|
|
||||||
// match literal expressions with a semi-colon afterwards
|
// match literal expressions with a semi-colon afterwards
|
||||||
TokenType::Number(_) | TokenType::String(_) => Expression::Literal(self.literal()?),
|
TokenType::Number(_) | TokenType::String(_) => Expression::Literal(self.literal()?),
|
||||||
|
|
||||||
// match priority expressions with a left parenthesis
|
// match priority expressions with a left parenthesis
|
||||||
TokenType::Symbol(Symbol::LParen) => Expression::PriorityExpression(self.priority()?),
|
TokenType::Symbol(Symbol::LParen) => Expression::Priority(self.priority()?),
|
||||||
|
|
||||||
_ => {
|
_ => {
|
||||||
return Err(ParseError::UnexpectedToken(current_token.clone()));
|
return Err(ParseError::UnexpectedToken(current_token.clone()));
|
||||||
@@ -223,13 +217,13 @@ impl Parser {
|
|||||||
|
|
||||||
// check if the next or current token is an operator
|
// check if the next or current token is an operator
|
||||||
if self_matches_peek!(self, TokenType::Symbol(s) if s.is_operator()) {
|
if self_matches_peek!(self, TokenType::Symbol(s) if s.is_operator()) {
|
||||||
return Ok(Some(Expression::BinaryExpression(self.binary(expr)?)));
|
return Ok(Some(Expression::Binary(self.binary(expr)?)));
|
||||||
}
|
}
|
||||||
// This is an edge case. We need to move back one token if the current token is an operator
|
// This is an edge case. We need to move back one token if the current token is an operator
|
||||||
// so the binary expression can pick up the operator
|
// so the binary expression can pick up the operator
|
||||||
else if self_matches_current!(self, TokenType::Symbol(s) if s.is_operator()) {
|
else if self_matches_current!(self, TokenType::Symbol(s) if s.is_operator()) {
|
||||||
self.tokenizer.seek(SeekFrom::Current(-1))?;
|
self.tokenizer.seek(SeekFrom::Current(-1))?;
|
||||||
return Ok(Some(Expression::BinaryExpression(self.binary(expr)?)));
|
return Ok(Some(Expression::Binary(self.binary(expr)?)));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Some(expr))
|
Ok(Some(expr))
|
||||||
@@ -248,14 +242,12 @@ impl Parser {
|
|||||||
Ok(Expression::Variable(ident))
|
Ok(Expression::Variable(ident))
|
||||||
}
|
}
|
||||||
// A priority expression ( -> (1 + 2) <- + 3 )
|
// A priority expression ( -> (1 + 2) <- + 3 )
|
||||||
TokenType::Symbol(Symbol::LParen) => {
|
TokenType::Symbol(Symbol::LParen) => self.priority().map(Expression::Priority),
|
||||||
self.priority().map(Expression::PriorityExpression)
|
|
||||||
}
|
|
||||||
// A function invocation
|
// A function invocation
|
||||||
TokenType::Identifier(_)
|
TokenType::Identifier(_)
|
||||||
if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) =>
|
if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) =>
|
||||||
{
|
{
|
||||||
self.invocation().map(Expression::InvocationExpression)
|
self.invocation().map(Expression::Invocation)
|
||||||
}
|
}
|
||||||
_ => Err(ParseError::UnexpectedToken(current_token.clone())),
|
_ => Err(ParseError::UnexpectedToken(current_token.clone())),
|
||||||
}
|
}
|
||||||
@@ -322,9 +314,9 @@ impl Parser {
|
|||||||
|
|
||||||
// first, make sure the previous expression supports binary expressions
|
// first, make sure the previous expression supports binary expressions
|
||||||
match previous {
|
match previous {
|
||||||
Expression::BinaryExpression(_) // 1 + 2 + 3
|
Expression::Binary(_) // 1 + 2 + 3
|
||||||
| Expression::InvocationExpression(_) // add() + 3
|
| Expression::Invocation(_) // add() + 3
|
||||||
| Expression::PriorityExpression(_) // (1 + 2) + 3
|
| Expression::Priority(_) // (1 + 2) + 3
|
||||||
| Expression::Literal(Literal::Number(_)) // 1 + 2 (no addition of strings)
|
| Expression::Literal(Literal::Number(_)) // 1 + 2 (no addition of strings)
|
||||||
| Expression::Variable(_) // x + 2
|
| Expression::Variable(_) // x + 2
|
||||||
| Expression::Negation(_) // -1 + 2
|
| Expression::Negation(_) // -1 + 2
|
||||||
@@ -371,10 +363,7 @@ impl Parser {
|
|||||||
let right = expressions.remove(index);
|
let right = expressions.remove(index);
|
||||||
expressions.insert(
|
expressions.insert(
|
||||||
index,
|
index,
|
||||||
Expression::BinaryExpression(BinaryExpression::Exponent(
|
Expression::Binary(BinaryExpression::Exponent(boxed!(left), boxed!(right))),
|
||||||
boxed!(left),
|
|
||||||
boxed!(right),
|
|
||||||
)),
|
|
||||||
);
|
);
|
||||||
current_iteration += 1;
|
current_iteration += 1;
|
||||||
}
|
}
|
||||||
@@ -394,17 +383,11 @@ impl Parser {
|
|||||||
match operator {
|
match operator {
|
||||||
Symbol::Asterisk => expressions.insert(
|
Symbol::Asterisk => expressions.insert(
|
||||||
index,
|
index,
|
||||||
Expression::BinaryExpression(BinaryExpression::Multiply(
|
Expression::Binary(BinaryExpression::Multiply(boxed!(left), boxed!(right))),
|
||||||
boxed!(left),
|
|
||||||
boxed!(right),
|
|
||||||
)),
|
|
||||||
),
|
),
|
||||||
Symbol::Slash => expressions.insert(
|
Symbol::Slash => expressions.insert(
|
||||||
index,
|
index,
|
||||||
Expression::BinaryExpression(BinaryExpression::Divide(
|
Expression::Binary(BinaryExpression::Divide(boxed!(left), boxed!(right))),
|
||||||
boxed!(left),
|
|
||||||
boxed!(right),
|
|
||||||
)),
|
|
||||||
),
|
),
|
||||||
// safety: we have already checked for the operator
|
// safety: we have already checked for the operator
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
@@ -427,17 +410,11 @@ impl Parser {
|
|||||||
match operator {
|
match operator {
|
||||||
Symbol::Plus => expressions.insert(
|
Symbol::Plus => expressions.insert(
|
||||||
index,
|
index,
|
||||||
Expression::BinaryExpression(BinaryExpression::Add(
|
Expression::Binary(BinaryExpression::Add(boxed!(left), boxed!(right))),
|
||||||
boxed!(left),
|
|
||||||
boxed!(right),
|
|
||||||
)),
|
|
||||||
),
|
),
|
||||||
Symbol::Minus => expressions.insert(
|
Symbol::Minus => expressions.insert(
|
||||||
index,
|
index,
|
||||||
Expression::BinaryExpression(BinaryExpression::Subtract(
|
Expression::Binary(BinaryExpression::Subtract(boxed!(left), boxed!(right))),
|
||||||
boxed!(left),
|
|
||||||
boxed!(right),
|
|
||||||
)),
|
|
||||||
),
|
),
|
||||||
// safety: we have already checked for the operator
|
// safety: we have already checked for the operator
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
@@ -467,7 +444,7 @@ impl Parser {
|
|||||||
|
|
||||||
// Ensure the last expression is a binary expression
|
// Ensure the last expression is a binary expression
|
||||||
match expressions.pop().unwrap() {
|
match expressions.pop().unwrap() {
|
||||||
Expression::BinaryExpression(binary) => Ok(binary),
|
Expression::Binary(binary) => Ok(binary),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -512,7 +489,7 @@ impl Parser {
|
|||||||
let current_token = token_from_option!(self.current_token);
|
let current_token = token_from_option!(self.current_token);
|
||||||
let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?;
|
let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?;
|
||||||
|
|
||||||
if let Expression::BlockExpression(_) = expression {
|
if let Expression::Block(_) = expression {
|
||||||
return Err(ParseError::InvalidSyntax(
|
return Err(ParseError::InvalidSyntax(
|
||||||
current_token,
|
current_token,
|
||||||
String::from("Block expressions are not allowed in function invocations"),
|
String::from("Block expressions are not allowed in function invocations"),
|
||||||
@@ -568,7 +545,7 @@ impl Parser {
|
|||||||
if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) {
|
if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) {
|
||||||
self.assign_next()?;
|
self.assign_next()?;
|
||||||
let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?;
|
let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?;
|
||||||
let return_expr = Expression::ReturnExpression(boxed!(expression));
|
let return_expr = Expression::Return(boxed!(expression));
|
||||||
expressions.push(return_expr);
|
expressions.push(return_expr);
|
||||||
self.assign_next()?;
|
self.assign_next()?;
|
||||||
}
|
}
|
||||||
@@ -604,7 +581,7 @@ impl Parser {
|
|||||||
return Err(ParseError::UnexpectedToken(current_token.clone()));
|
return Err(ParseError::UnexpectedToken(current_token.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Expression::DeclarationExpression(
|
Ok(Expression::Declaration(
|
||||||
identifier,
|
identifier,
|
||||||
boxed!(assignment_expression),
|
boxed!(assignment_expression),
|
||||||
))
|
))
|
||||||
|
|||||||
@@ -151,11 +151,29 @@ impl std::fmt::Display for SysCall {
|
|||||||
|
|
||||||
impl SysCall {
|
impl SysCall {
|
||||||
pub fn is_syscall(identifier: &str) -> bool {
|
pub fn is_syscall(identifier: &str) -> bool {
|
||||||
match identifier {
|
matches!(
|
||||||
"yield" | "sleep" | "HASH" | "loadFromDevice" | "setOnDevice" => true,
|
identifier,
|
||||||
"acos" | "asin" | "atan" | "atan2" | "abs" | "ceil" | "cos" | "floor" | "log"
|
"yield"
|
||||||
| "max" | "min" | "rand" | "sin" | "sqrt" | "tan" | "trunc" => true,
|
| "sleep"
|
||||||
_ => false,
|
| "HASH"
|
||||||
}
|
| "loadFromDevice"
|
||||||
|
| "setOnDevice"
|
||||||
|
| "acos"
|
||||||
|
| "asin"
|
||||||
|
| "atan"
|
||||||
|
| "atan2"
|
||||||
|
| "abs"
|
||||||
|
| "ceil"
|
||||||
|
| "cos"
|
||||||
|
| "floor"
|
||||||
|
| "log"
|
||||||
|
| "max"
|
||||||
|
| "min"
|
||||||
|
| "rand"
|
||||||
|
| "sin"
|
||||||
|
| "sqrt"
|
||||||
|
| "tan"
|
||||||
|
| "trunc"
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
use crate::tokenizer::token::Number;
|
|
||||||
|
|
||||||
use super::sys_call::SysCall;
|
use super::sys_call::SysCall;
|
||||||
|
use tokenizer::token::Number;
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||||
pub enum Literal {
|
pub enum Literal {
|
||||||
@@ -92,11 +91,7 @@ impl std::fmt::Display for FunctionExpression {
|
|||||||
f,
|
f,
|
||||||
"(fn {}({}) {{ {} }})",
|
"(fn {}({}) {{ {} }})",
|
||||||
self.name,
|
self.name,
|
||||||
self.arguments
|
self.arguments.to_vec().join(", "),
|
||||||
.iter()
|
|
||||||
.cloned()
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join(", "),
|
|
||||||
self.body
|
self.body
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -171,20 +166,20 @@ impl std::fmt::Display for DeviceDeclarationExpression {
|
|||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum Expression {
|
pub enum Expression {
|
||||||
AssignmentExpression(AssignmentExpression),
|
Assignment(AssignmentExpression),
|
||||||
BinaryExpression(BinaryExpression),
|
Binary(BinaryExpression),
|
||||||
BlockExpression(BlockExpression),
|
Block(BlockExpression),
|
||||||
DeclarationExpression(String, Box<Expression>),
|
Declaration(String, Box<Expression>),
|
||||||
FunctionExpression(FunctionExpression),
|
Function(FunctionExpression),
|
||||||
InvocationExpression(InvocationExpression),
|
Invocation(InvocationExpression),
|
||||||
Literal(Literal),
|
Literal(Literal),
|
||||||
LogicalExpression(LogicalExpression),
|
Logical(LogicalExpression),
|
||||||
Negation(Box<Expression>),
|
Negation(Box<Expression>),
|
||||||
PriorityExpression(Box<Expression>),
|
Priority(Box<Expression>),
|
||||||
ReturnExpression(Box<Expression>),
|
Return(Box<Expression>),
|
||||||
Variable(String),
|
Variable(String),
|
||||||
DeviceDeclarationExpression(DeviceDeclarationExpression),
|
DeviceDeclaration(DeviceDeclarationExpression),
|
||||||
SyscallExpression(SysCall),
|
Syscall(SysCall),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Expression {
|
impl std::fmt::Display for Expression {
|
||||||
@@ -192,18 +187,18 @@ impl std::fmt::Display for Expression {
|
|||||||
match self {
|
match self {
|
||||||
Expression::Literal(l) => write!(f, "{}", l),
|
Expression::Literal(l) => write!(f, "{}", l),
|
||||||
Expression::Negation(e) => write!(f, "(-{})", e),
|
Expression::Negation(e) => write!(f, "(-{})", e),
|
||||||
Expression::BinaryExpression(e) => write!(f, "{}", e),
|
Expression::Binary(e) => write!(f, "{}", e),
|
||||||
Expression::LogicalExpression(e) => write!(f, "{}", e),
|
Expression::Logical(e) => write!(f, "{}", e),
|
||||||
Expression::AssignmentExpression(e) => write!(f, "{}", e),
|
Expression::Assignment(e) => write!(f, "{}", e),
|
||||||
Expression::DeclarationExpression(id, e) => write!(f, "(let {} = {})", id, e),
|
Expression::Declaration(id, e) => write!(f, "(let {} = {})", id, e),
|
||||||
Expression::FunctionExpression(e) => write!(f, "{}", e),
|
Expression::Function(e) => write!(f, "{}", e),
|
||||||
Expression::BlockExpression(e) => write!(f, "{}", e),
|
Expression::Block(e) => write!(f, "{}", e),
|
||||||
Expression::InvocationExpression(e) => write!(f, "{}", e),
|
Expression::Invocation(e) => write!(f, "{}", e),
|
||||||
Expression::Variable(id) => write!(f, "{}", id),
|
Expression::Variable(id) => write!(f, "{}", id),
|
||||||
Expression::PriorityExpression(e) => write!(f, "({})", e),
|
Expression::Priority(e) => write!(f, "({})", e),
|
||||||
Expression::ReturnExpression(e) => write!(f, "(return {})", e),
|
Expression::Return(e) => write!(f, "(return {})", e),
|
||||||
Expression::DeviceDeclarationExpression(e) => write!(f, "{}", e),
|
Expression::DeviceDeclaration(e) => write!(f, "{}", e),
|
||||||
Expression::SyscallExpression(e) => write!(f, "{}", e),
|
Expression::Syscall(e) => write!(f, "{}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,895 +0,0 @@
|
|||||||
pub mod token;
|
|
||||||
|
|
||||||
use rust_decimal::Decimal;
|
|
||||||
use std::{
|
|
||||||
cmp::Ordering,
|
|
||||||
collections::VecDeque,
|
|
||||||
io::{BufReader, Cursor, Read, Seek, SeekFrom},
|
|
||||||
path::PathBuf,
|
|
||||||
};
|
|
||||||
use token::{Keyword, Number, Symbol, Temperature, Token, TokenType};
|
|
||||||
|
|
||||||
quick_error! {
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum TokenizerError {
|
|
||||||
IOError(err: std::io::Error) {
|
|
||||||
from()
|
|
||||||
display("IO Error: {}", err)
|
|
||||||
source(err)
|
|
||||||
}
|
|
||||||
NumberParseError(err: std::num::ParseIntError, line: usize, column: usize) {
|
|
||||||
display("Number Parse Error: {}\nLine: {}, Column: {}", err, line, column)
|
|
||||||
source(err)
|
|
||||||
}
|
|
||||||
DecimalParseError(err: rust_decimal::Error, line: usize, column: usize) {
|
|
||||||
display("Decimal Parse Error: {}\nLine: {}, Column: {}", err, line, column)
|
|
||||||
source(err)
|
|
||||||
}
|
|
||||||
UnknownSymbolError(char: char, line: usize, column: usize) {
|
|
||||||
display("Unknown Symbol: {}\nLine: {}, Column: {}", char, line, column)
|
|
||||||
}
|
|
||||||
UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize) {
|
|
||||||
display("Unknown Keyword or Identifier: {}\nLine: {}, Column: {}", val, line, column)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait Tokenize: Read + Seek {}
|
|
||||||
|
|
||||||
impl<T> Tokenize for T where T: Read + Seek {}
|
|
||||||
|
|
||||||
pub(crate) struct Tokenizer {
|
|
||||||
reader: BufReader<Box<dyn Tokenize>>,
|
|
||||||
char_buffer: [u8; 1],
|
|
||||||
line: usize,
|
|
||||||
column: usize,
|
|
||||||
returned_eof: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Tokenizer {
|
|
||||||
pub fn from_path(input_file: impl Into<PathBuf>) -> Result<Self, TokenizerError> {
|
|
||||||
let file = std::fs::File::open(input_file.into())?;
|
|
||||||
let reader = BufReader::new(Box::new(file) as Box<dyn Tokenize>);
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
reader,
|
|
||||||
line: 1,
|
|
||||||
column: 1,
|
|
||||||
char_buffer: [0],
|
|
||||||
returned_eof: false,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<String> for Tokenizer {
|
|
||||||
fn from(input: String) -> Self {
|
|
||||||
let reader = BufReader::new(Box::new(Cursor::new(input)) as Box<dyn Tokenize>);
|
|
||||||
|
|
||||||
Self {
|
|
||||||
reader,
|
|
||||||
line: 1,
|
|
||||||
column: 1,
|
|
||||||
char_buffer: [0],
|
|
||||||
returned_eof: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Tokenizer {
|
|
||||||
/// Consumes the tokenizer and returns the next token in the stream
|
|
||||||
/// If there are no more tokens in the stream, this function returns None
|
|
||||||
/// If there is an error reading the stream, this function returns an error
|
|
||||||
///
|
|
||||||
/// # Important
|
|
||||||
/// This function will increment the line and column counters
|
|
||||||
fn next_char(&mut self) -> Result<Option<char>, TokenizerError> {
|
|
||||||
let bytes_read = self.reader.read(&mut self.char_buffer)?;
|
|
||||||
|
|
||||||
if bytes_read == 0 {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1
|
|
||||||
let c = self.char_buffer[0] as char;
|
|
||||||
if c == '\n' {
|
|
||||||
self.line += 1;
|
|
||||||
self.column = 1;
|
|
||||||
} else {
|
|
||||||
self.column += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Some(c))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peeks the next character in the stream without consuming it
|
|
||||||
///
|
|
||||||
/// # Important
|
|
||||||
/// This does not increment the line or column counters
|
|
||||||
fn peek_next_char(&mut self) -> Result<Option<char>, TokenizerError> {
|
|
||||||
let current_pos = self.reader.stream_position()?;
|
|
||||||
|
|
||||||
let to_return = if self.reader.read(&mut self.char_buffer)? == 0 {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
self.reader.seek(SeekFrom::Start(current_pos))?;
|
|
||||||
|
|
||||||
// Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1
|
|
||||||
Some(self.char_buffer[0] as char)
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(to_return)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Skips the current line in the stream.
|
|
||||||
/// Useful for skipping comments or empty lines
|
|
||||||
///
|
|
||||||
/// # Important
|
|
||||||
/// This function will increment the line and column counters
|
|
||||||
fn skip_line(&mut self) -> Result<(), TokenizerError> {
|
|
||||||
while let Some(next_char) = self.next_char()? {
|
|
||||||
if next_char == '\n' {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consumes the tokenizer and returns the next token in the stream
|
|
||||||
/// If there are no more tokens in the stream, this function returns None
|
|
||||||
pub fn next_token(&mut self) -> Result<Option<Token>, TokenizerError> {
|
|
||||||
while let Some(next_char) = self.next_char()? {
|
|
||||||
// skip whitespace
|
|
||||||
if next_char.is_whitespace() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// skip comments
|
|
||||||
if next_char == '/' && self.peek_next_char()? == Some('/') {
|
|
||||||
self.skip_line()?;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
match next_char {
|
|
||||||
// numbers
|
|
||||||
'0'..='9' => {
|
|
||||||
return self.tokenize_number(next_char).map(Some);
|
|
||||||
}
|
|
||||||
// strings
|
|
||||||
'"' | '\'' => return self.tokenize_string(next_char).map(Some),
|
|
||||||
// symbols excluding `"` and `'`
|
|
||||||
char if !char.is_alphanumeric() && char != '"' && char != '\'' => {
|
|
||||||
return self.tokenize_symbol(next_char).map(Some)
|
|
||||||
}
|
|
||||||
// keywords and identifiers
|
|
||||||
char if char.is_alphabetic() => {
|
|
||||||
return self.tokenize_keyword_or_identifier(next_char).map(Some)
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
return Err(TokenizerError::UnknownSymbolError(
|
|
||||||
next_char,
|
|
||||||
self.line,
|
|
||||||
self.column,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if self.returned_eof {
|
|
||||||
Ok(None)
|
|
||||||
} else {
|
|
||||||
self.returned_eof = true;
|
|
||||||
Ok(Some(Token::new(TokenType::EOF, self.line, self.column)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peeks the next token in the stream without consuming it
|
|
||||||
/// If there are no more tokens in the stream, this function returns None
|
|
||||||
pub fn peek_next(&mut self) -> Result<Option<Token>, TokenizerError> {
|
|
||||||
let current_pos = self.reader.stream_position()?;
|
|
||||||
let column = self.column.clone();
|
|
||||||
let line = self.line.clone();
|
|
||||||
|
|
||||||
let token = self.next_token()?;
|
|
||||||
self.reader.seek(SeekFrom::Start(current_pos))?;
|
|
||||||
self.column = column;
|
|
||||||
self.line = line;
|
|
||||||
Ok(token)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tokenizes a symbol
|
|
||||||
fn tokenize_symbol(&mut self, first_symbol: char) -> Result<Token, TokenizerError> {
|
|
||||||
/// Helper macro to create a symbol token
|
|
||||||
macro_rules! symbol {
|
|
||||||
($symbol:ident) => {
|
|
||||||
Ok(Token::new(
|
|
||||||
TokenType::Symbol(Symbol::$symbol),
|
|
||||||
self.line,
|
|
||||||
self.column,
|
|
||||||
))
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
match first_symbol {
|
|
||||||
// single character symbols
|
|
||||||
'(' => symbol!(LParen),
|
|
||||||
')' => symbol!(RParen),
|
|
||||||
'{' => symbol!(LBrace),
|
|
||||||
'}' => symbol!(RBrace),
|
|
||||||
'[' => symbol!(LBracket),
|
|
||||||
']' => symbol!(RBracket),
|
|
||||||
';' => symbol!(Semicolon),
|
|
||||||
':' => symbol!(Colon),
|
|
||||||
',' => symbol!(Comma),
|
|
||||||
'+' => symbol!(Plus),
|
|
||||||
'-' => symbol!(Minus),
|
|
||||||
'/' => symbol!(Slash),
|
|
||||||
|
|
||||||
'.' => symbol!(Dot),
|
|
||||||
'^' => symbol!(Caret),
|
|
||||||
|
|
||||||
// multi-character symbols
|
|
||||||
'<' if self.peek_next_char()? == Some('=') => {
|
|
||||||
self.next_char()?;
|
|
||||||
symbol!(LessThanOrEqual)
|
|
||||||
}
|
|
||||||
'<' => symbol!(LessThan),
|
|
||||||
|
|
||||||
'>' if self.peek_next_char()? == Some('=') => {
|
|
||||||
self.next_char()?;
|
|
||||||
symbol!(GreaterThanOrEqual)
|
|
||||||
}
|
|
||||||
'>' => symbol!(GreaterThan),
|
|
||||||
|
|
||||||
'=' if self.peek_next_char()? == Some('=') => {
|
|
||||||
self.next_char()?;
|
|
||||||
symbol!(Equal)
|
|
||||||
}
|
|
||||||
'=' => symbol!(Assign),
|
|
||||||
|
|
||||||
'!' if self.peek_next_char()? == Some('=') => {
|
|
||||||
self.next_char()?;
|
|
||||||
symbol!(NotEqual)
|
|
||||||
}
|
|
||||||
'!' => symbol!(LogicalNot),
|
|
||||||
|
|
||||||
'*' if self.peek_next_char()? == Some('*') => {
|
|
||||||
self.next_char()?;
|
|
||||||
symbol!(Exp)
|
|
||||||
}
|
|
||||||
'*' => symbol!(Asterisk),
|
|
||||||
|
|
||||||
'&' if self.peek_next_char()? == Some('&') => {
|
|
||||||
self.next_char()?;
|
|
||||||
symbol!(LogicalAnd)
|
|
||||||
}
|
|
||||||
'|' if self.peek_next_char()? == Some('|') => {
|
|
||||||
self.next_char()?;
|
|
||||||
symbol!(LogicalOr)
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => Err(TokenizerError::UnknownSymbolError(
|
|
||||||
first_symbol,
|
|
||||||
self.line,
|
|
||||||
self.column,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tokenizes a number literal. Also handles temperatures with a suffix of `c`, `f`, or `k`.
|
|
||||||
fn tokenize_number(&mut self, first_char: char) -> Result<Token, TokenizerError> {
|
|
||||||
let mut primary = String::with_capacity(16);
|
|
||||||
let mut decimal: Option<String> = None;
|
|
||||||
let mut reading_decimal = false;
|
|
||||||
|
|
||||||
let column = self.column.clone();
|
|
||||||
let line = self.line.clone();
|
|
||||||
|
|
||||||
primary.push(first_char);
|
|
||||||
|
|
||||||
while let Some(next_char) = self.peek_next_char()? {
|
|
||||||
if next_char.is_whitespace() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if next_char == '.' {
|
|
||||||
reading_decimal = true;
|
|
||||||
self.next_char()?;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// support underscores in numbers for readability
|
|
||||||
if next_char == '_' {
|
|
||||||
self.next_char()?;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is for the times when we have a number followed by a symbol (like a semicolon or =)
|
|
||||||
if !next_char.is_numeric() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if reading_decimal {
|
|
||||||
decimal.get_or_insert_with(String::new).push(next_char);
|
|
||||||
} else {
|
|
||||||
primary.push(next_char);
|
|
||||||
}
|
|
||||||
self.next_char()?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let number: Number = if let Some(decimal) = decimal {
|
|
||||||
let decimal_scale = decimal.len() as u32;
|
|
||||||
let number = format!("{}{}", primary, decimal)
|
|
||||||
.parse::<i128>()
|
|
||||||
.map_err(|e| TokenizerError::NumberParseError(e, self.line, self.column))?;
|
|
||||||
Number::Decimal(
|
|
||||||
Decimal::try_from_i128_with_scale(number, decimal_scale)
|
|
||||||
.map_err(|e| TokenizerError::DecimalParseError(e, line, column))?,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
Number::Integer(
|
|
||||||
primary
|
|
||||||
.parse()
|
|
||||||
.map_err(|e| TokenizerError::NumberParseError(e, line, column))?,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// check if the next char is a temperature suffix
|
|
||||||
if let Some(next_char) = self.peek_next_char()? {
|
|
||||||
let temperature = match next_char {
|
|
||||||
'c' => Temperature::Celsius(number),
|
|
||||||
'f' => Temperature::Fahrenheit(number),
|
|
||||||
'k' => Temperature::Kelvin(number),
|
|
||||||
_ => return Ok(Token::new(TokenType::Number(number), line, column)),
|
|
||||||
}
|
|
||||||
.to_kelvin();
|
|
||||||
|
|
||||||
self.next_char()?;
|
|
||||||
Ok(Token::new(TokenType::Number(temperature), line, column))
|
|
||||||
} else {
|
|
||||||
Ok(Token::new(TokenType::Number(number), line, column))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tokenizes a string literal
|
|
||||||
fn tokenize_string(&mut self, beginning_quote: char) -> Result<Token, TokenizerError> {
|
|
||||||
let mut buffer = String::with_capacity(16);
|
|
||||||
|
|
||||||
let column = self.column.clone();
|
|
||||||
let line = self.line.clone();
|
|
||||||
|
|
||||||
while let Some(next_char) = self.next_char()? {
|
|
||||||
if next_char == beginning_quote {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer.push(next_char);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Token::new(TokenType::String(buffer), line, column))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tokenizes a keyword or an identifier. Also handles boolean literals
|
|
||||||
fn tokenize_keyword_or_identifier(
|
|
||||||
&mut self,
|
|
||||||
first_char: char,
|
|
||||||
) -> Result<Token, TokenizerError> {
|
|
||||||
macro_rules! keyword {
|
|
||||||
($keyword:ident) => {{
|
|
||||||
return Ok(Token::new(
|
|
||||||
TokenType::Keyword(Keyword::$keyword),
|
|
||||||
self.line,
|
|
||||||
self.column,
|
|
||||||
));
|
|
||||||
}};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Helper macro to check if the next character is whitespace or not alphanumeric
|
|
||||||
macro_rules! next_ws {
|
|
||||||
() => {
|
|
||||||
matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || matches!(self.peek_next_char()?, None)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut buffer = String::with_capacity(16);
|
|
||||||
let line = self.line.clone();
|
|
||||||
let column = self.column.clone();
|
|
||||||
|
|
||||||
let mut looped_char = Some(first_char);
|
|
||||||
|
|
||||||
while let Some(next_char) = looped_char {
|
|
||||||
if next_char.is_whitespace() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if !next_char.is_alphanumeric() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
buffer.push(next_char);
|
|
||||||
|
|
||||||
match buffer.as_str() {
|
|
||||||
"let" if next_ws!() => keyword!(Let),
|
|
||||||
"fn" if next_ws!() => keyword!(Fn),
|
|
||||||
"if" if next_ws!() => keyword!(If),
|
|
||||||
"else" if next_ws!() => keyword!(Else),
|
|
||||||
"return" if next_ws!() => keyword!(Return),
|
|
||||||
"enum" if next_ws!() => keyword!(Enum),
|
|
||||||
"device" if next_ws!() => keyword!(Device),
|
|
||||||
"loop" if next_ws!() => keyword!(Loop),
|
|
||||||
"break" if next_ws!() => keyword!(Break),
|
|
||||||
|
|
||||||
// boolean literals
|
|
||||||
"true" if next_ws!() => {
|
|
||||||
return Ok(Token::new(TokenType::Boolean(true), self.line, self.column))
|
|
||||||
}
|
|
||||||
"false" if next_ws!() => {
|
|
||||||
return Ok(Token::new(
|
|
||||||
TokenType::Boolean(false),
|
|
||||||
self.line,
|
|
||||||
self.column,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
// if the next character is whitespace or not alphanumeric, then we have an identifier
|
|
||||||
// this is because keywords are checked first
|
|
||||||
val if next_ws!() => {
|
|
||||||
return Ok(Token::new(
|
|
||||||
TokenType::Identifier(val.to_string()),
|
|
||||||
line,
|
|
||||||
column,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
looped_char = self.next_char()?;
|
|
||||||
}
|
|
||||||
Err(TokenizerError::UnknownKeywordOrIdentifierError(
|
|
||||||
buffer, line, column,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct TokenizerBuffer {
|
|
||||||
tokenizer: Tokenizer,
|
|
||||||
buffer: VecDeque<Token>,
|
|
||||||
history: VecDeque<Token>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TokenizerBuffer {
|
|
||||||
pub fn new(tokenizer: Tokenizer) -> Self {
|
|
||||||
Self {
|
|
||||||
tokenizer,
|
|
||||||
buffer: VecDeque::new(),
|
|
||||||
history: VecDeque::with_capacity(128),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Reads the next token from the tokenizer, pushing the value to the back of the history
|
|
||||||
/// and returning the token
|
|
||||||
pub fn next(&mut self) -> Result<Option<Token>, TokenizerError> {
|
|
||||||
if let Some(token) = self.buffer.pop_front() {
|
|
||||||
self.history.push_back(token.clone());
|
|
||||||
return Ok(Some(token));
|
|
||||||
}
|
|
||||||
|
|
||||||
let token = self.tokenizer.next_token()?;
|
|
||||||
if let Some(ref token) = token {
|
|
||||||
self.history.push_back(token.clone());
|
|
||||||
}
|
|
||||||
Ok(token)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peeks the next token in the stream without adding to the history stack
|
|
||||||
pub fn peek(&mut self) -> Result<Option<Token>, TokenizerError> {
|
|
||||||
if let Some(token) = self.buffer.front() {
|
|
||||||
return Ok(Some(token.clone()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let token = self.tokenizer.peek_next()?;
|
|
||||||
Ok(token)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> {
|
|
||||||
use Ordering::*;
|
|
||||||
// if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer
|
|
||||||
// if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history)
|
|
||||||
// if seek_to == 0 then we don't need to do anything
|
|
||||||
|
|
||||||
match seek_to.cmp(&0) {
|
|
||||||
Greater => {
|
|
||||||
let mut tokens = Vec::with_capacity(seek_to as usize);
|
|
||||||
for _ in 0..seek_to {
|
|
||||||
if let Some(token) = self.tokenizer.next_token()? {
|
|
||||||
tokens.push(token);
|
|
||||||
} else {
|
|
||||||
return Err(TokenizerError::IOError(std::io::Error::new(
|
|
||||||
std::io::ErrorKind::UnexpectedEof,
|
|
||||||
"Unexpected EOF",
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.history.extend(tokens);
|
|
||||||
}
|
|
||||||
Less => {
|
|
||||||
let seek_to = seek_to.unsigned_abs() as usize;
|
|
||||||
let mut tokens = Vec::with_capacity(seek_to);
|
|
||||||
for _ in 0..seek_to {
|
|
||||||
if let Some(token) = self.history.pop_back() {
|
|
||||||
tokens.push(token);
|
|
||||||
} else {
|
|
||||||
return Err(TokenizerError::IOError(std::io::Error::new(
|
|
||||||
std::io::ErrorKind::UnexpectedEof,
|
|
||||||
"Unexpected EOF",
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.buffer.extend(tokens.into_iter().rev());
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Adds to or removes from the History stack, allowing the user to move back and forth in the stream
|
|
||||||
pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> {
|
|
||||||
match from {
|
|
||||||
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
|
|
||||||
SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"),
|
|
||||||
SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"),
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use anyhow::Result;
|
|
||||||
use rust_decimal::Decimal;
|
|
||||||
|
|
||||||
const TEST_FILE: &str = "tests/file.stlg";
|
|
||||||
|
|
||||||
const TEST_STRING: &str = r#"
|
|
||||||
fn test() {
|
|
||||||
let x = 10;
|
|
||||||
return x + 2;
|
|
||||||
}
|
|
||||||
"#;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_seek_from_current() -> Result<()> {
|
|
||||||
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
|
||||||
let mut buffer = TokenizerBuffer::new(tokenizer);
|
|
||||||
|
|
||||||
let token = buffer.next()?.unwrap();
|
|
||||||
assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
|
|
||||||
|
|
||||||
buffer.seek(SeekFrom::Current(1))?;
|
|
||||||
|
|
||||||
let token = buffer.next()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_tokenizer_from_path_ok() {
|
|
||||||
let tokenizer = Tokenizer::from_path(TEST_FILE);
|
|
||||||
assert!(tokenizer.is_ok());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_tokenizer_from_path_err() {
|
|
||||||
let tokenizer = Tokenizer::from_path("non_existent_file.stlg");
|
|
||||||
assert!(tokenizer.is_err());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_next_char() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
|
||||||
|
|
||||||
let char = tokenizer.next_char()?;
|
|
||||||
|
|
||||||
assert_eq!(char, Some('\n'));
|
|
||||||
assert_eq!(tokenizer.line, 2);
|
|
||||||
assert_eq!(tokenizer.column, 1);
|
|
||||||
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("fn"));
|
|
||||||
|
|
||||||
let char = tokenizer.next_char()?;
|
|
||||||
|
|
||||||
assert_eq!(char, Some('f'));
|
|
||||||
assert_eq!(tokenizer.line, 1);
|
|
||||||
assert_eq!(tokenizer.column, 2);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_peek_next_char() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
|
||||||
|
|
||||||
let char = tokenizer.peek_next_char()?;
|
|
||||||
|
|
||||||
assert_eq!(char, Some('\n'));
|
|
||||||
assert_eq!(tokenizer.line, 1);
|
|
||||||
assert_eq!(tokenizer.column, 1);
|
|
||||||
|
|
||||||
let char = tokenizer.next_char()?;
|
|
||||||
assert_eq!(char, Some('\n'));
|
|
||||||
assert_eq!(tokenizer.line, 2);
|
|
||||||
assert_eq!(tokenizer.column, 1);
|
|
||||||
|
|
||||||
let char = tokenizer.peek_next_char()?;
|
|
||||||
assert_eq!(char, Some(' '));
|
|
||||||
assert_eq!(tokenizer.line, 2);
|
|
||||||
assert_eq!(tokenizer.column, 1);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_temperature_unit() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("10c 14f 10k"));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
token.token_type,
|
|
||||||
TokenType::Number(Number::Decimal(Decimal::new(28315, 2)))
|
|
||||||
);
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
token.token_type,
|
|
||||||
TokenType::Number(Number::Decimal(Decimal::new(26315, 2)))
|
|
||||||
);
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_integer() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("10"));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_integer_with_underscore() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("1_000"));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(token.token_type, TokenType::Number(Number::Integer(1000)));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_decimal() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("10.5"));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
token.token_type,
|
|
||||||
TokenType::Number(Number::Decimal(Decimal::new(105, 1))) // 10.5
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_decimal_with_underscore() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("1_000.000_6"));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
token.token_type,
|
|
||||||
TokenType::Number(Number::Decimal(Decimal::new(10000006, 4))) // 1000.0006
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_number_with_symbol() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("10;"));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
|
|
||||||
|
|
||||||
let next_char = tokenizer.next_char()?;
|
|
||||||
|
|
||||||
assert_eq!(next_char, Some(';'));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_string_parse() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from(r#""Hello, World!""#));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
token.token_type,
|
|
||||||
TokenType::String(String::from("Hello, World!"))
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from(r#"'Hello, World!'"#));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
token.token_type,
|
|
||||||
TokenType::String(String::from("Hello, World!"))
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_symbol_parse() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from(
|
|
||||||
"^ ! () [] {} , . ; : + - * / < > = != && || >= <=**",
|
|
||||||
));
|
|
||||||
|
|
||||||
let expected_tokens = vec![
|
|
||||||
TokenType::Symbol(Symbol::Caret),
|
|
||||||
TokenType::Symbol(Symbol::LogicalNot),
|
|
||||||
TokenType::Symbol(Symbol::LParen),
|
|
||||||
TokenType::Symbol(Symbol::RParen),
|
|
||||||
TokenType::Symbol(Symbol::LBracket),
|
|
||||||
TokenType::Symbol(Symbol::RBracket),
|
|
||||||
TokenType::Symbol(Symbol::LBrace),
|
|
||||||
TokenType::Symbol(Symbol::RBrace),
|
|
||||||
TokenType::Symbol(Symbol::Comma),
|
|
||||||
TokenType::Symbol(Symbol::Dot),
|
|
||||||
TokenType::Symbol(Symbol::Semicolon),
|
|
||||||
TokenType::Symbol(Symbol::Colon),
|
|
||||||
TokenType::Symbol(Symbol::Plus),
|
|
||||||
TokenType::Symbol(Symbol::Minus),
|
|
||||||
TokenType::Symbol(Symbol::Asterisk),
|
|
||||||
TokenType::Symbol(Symbol::Slash),
|
|
||||||
TokenType::Symbol(Symbol::LessThan),
|
|
||||||
TokenType::Symbol(Symbol::GreaterThan),
|
|
||||||
TokenType::Symbol(Symbol::Assign),
|
|
||||||
TokenType::Symbol(Symbol::NotEqual),
|
|
||||||
TokenType::Symbol(Symbol::LogicalAnd),
|
|
||||||
TokenType::Symbol(Symbol::LogicalOr),
|
|
||||||
TokenType::Symbol(Symbol::GreaterThanOrEqual),
|
|
||||||
TokenType::Symbol(Symbol::LessThanOrEqual),
|
|
||||||
TokenType::Symbol(Symbol::Exp),
|
|
||||||
];
|
|
||||||
|
|
||||||
for expected_token in expected_tokens {
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(token.token_type, expected_token);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_keyword_parse() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("let fn if else return enum"));
|
|
||||||
|
|
||||||
let expected_tokens = vec![
|
|
||||||
TokenType::Keyword(Keyword::Let),
|
|
||||||
TokenType::Keyword(Keyword::Fn),
|
|
||||||
TokenType::Keyword(Keyword::If),
|
|
||||||
TokenType::Keyword(Keyword::Else),
|
|
||||||
TokenType::Keyword(Keyword::Return),
|
|
||||||
TokenType::Keyword(Keyword::Enum),
|
|
||||||
];
|
|
||||||
|
|
||||||
for expected_token in expected_tokens {
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(token.token_type, expected_token);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_identifier_parse() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("fn test"));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
token.token_type,
|
|
||||||
TokenType::Identifier(String::from("test"))
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_boolean_parse() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(String::from("true false"));
|
|
||||||
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
assert_eq!(token.token_type, TokenType::Boolean(true));
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
assert_eq!(token.token_type, TokenType::Boolean(false));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_full_source() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
|
||||||
|
|
||||||
let expected_tokens = vec![
|
|
||||||
TokenType::Keyword(Keyword::Fn),
|
|
||||||
TokenType::Identifier(String::from("test")),
|
|
||||||
TokenType::Symbol(Symbol::LParen),
|
|
||||||
TokenType::Symbol(Symbol::RParen),
|
|
||||||
TokenType::Symbol(Symbol::LBrace),
|
|
||||||
TokenType::Keyword(Keyword::Let),
|
|
||||||
TokenType::Identifier(String::from("x")),
|
|
||||||
TokenType::Symbol(Symbol::Assign),
|
|
||||||
TokenType::Number(Number::Integer(10)),
|
|
||||||
TokenType::Symbol(Symbol::Semicolon),
|
|
||||||
TokenType::Keyword(Keyword::Return),
|
|
||||||
TokenType::Identifier(String::from("x")),
|
|
||||||
TokenType::Symbol(Symbol::Plus),
|
|
||||||
TokenType::Number(Number::Integer(2)),
|
|
||||||
TokenType::Symbol(Symbol::Semicolon),
|
|
||||||
TokenType::Symbol(Symbol::RBrace),
|
|
||||||
];
|
|
||||||
|
|
||||||
for expected_token in expected_tokens {
|
|
||||||
let token = tokenizer.next_token()?.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(token.token_type, expected_token);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_peek_next() -> Result<()> {
|
|
||||||
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
|
||||||
|
|
||||||
let column = tokenizer.column.clone();
|
|
||||||
let line = tokenizer.line.clone();
|
|
||||||
|
|
||||||
let peeked_token = tokenizer.peek_next()?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
peeked_token.unwrap().token_type,
|
|
||||||
TokenType::Keyword(Keyword::Fn)
|
|
||||||
);
|
|
||||||
assert_eq!(tokenizer.column, column);
|
|
||||||
assert_eq!(tokenizer.line, line);
|
|
||||||
|
|
||||||
let next_token = tokenizer.next_token()?;
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
next_token.unwrap().token_type,
|
|
||||||
TokenType::Keyword(Keyword::Fn)
|
|
||||||
);
|
|
||||||
assert_ne!(tokenizer.column, column);
|
|
||||||
assert_ne!(tokenizer.line, line);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,221 +0,0 @@
|
|||||||
use rust_decimal::Decimal;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
||||||
pub struct Token {
|
|
||||||
/// The type of the token
|
|
||||||
pub token_type: TokenType,
|
|
||||||
/// The line where the token was found
|
|
||||||
pub line: usize,
|
|
||||||
/// The column where the token was found
|
|
||||||
pub column: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Token {
|
|
||||||
pub fn new(token_type: TokenType, line: usize, column: usize) -> Self {
|
|
||||||
Self {
|
|
||||||
token_type,
|
|
||||||
line,
|
|
||||||
column,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Hash, Eq, Clone)]
|
|
||||||
pub enum Temperature {
|
|
||||||
Celsius(Number),
|
|
||||||
Fahrenheit(Number),
|
|
||||||
Kelvin(Number),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for Temperature {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
Temperature::Celsius(n) => write!(f, "{}°C", n),
|
|
||||||
Temperature::Fahrenheit(n) => write!(f, "{}°F", n),
|
|
||||||
Temperature::Kelvin(n) => write!(f, "{}K", n),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Temperature {
|
|
||||||
pub fn to_kelvin(self) -> Number {
|
|
||||||
match self {
|
|
||||||
Temperature::Celsius(n) => {
|
|
||||||
let n = match n {
|
|
||||||
Number::Integer(i) => Decimal::new(i as i64, 0),
|
|
||||||
Number::Decimal(d) => d,
|
|
||||||
};
|
|
||||||
Number::Decimal(n + Decimal::new(27315, 2))
|
|
||||||
}
|
|
||||||
Temperature::Fahrenheit(n) => {
|
|
||||||
let n = match n {
|
|
||||||
Number::Integer(i) => Decimal::new(i as i64, 0),
|
|
||||||
Number::Decimal(d) => d,
|
|
||||||
};
|
|
||||||
|
|
||||||
let a = n - Decimal::new(32, 0);
|
|
||||||
let b = Decimal::new(5, 0) / Decimal::new(9, 0);
|
|
||||||
Number::Decimal(a * b + Decimal::new(27315, 2))
|
|
||||||
}
|
|
||||||
Temperature::Kelvin(n) => n,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Hash, Eq, Clone)]
|
|
||||||
pub enum TokenType {
|
|
||||||
/// Represents a string token
|
|
||||||
String(String),
|
|
||||||
/// Represents a number token
|
|
||||||
Number(Number),
|
|
||||||
/// Represents a boolean token
|
|
||||||
Boolean(bool),
|
|
||||||
/// Represents a keyword token
|
|
||||||
Keyword(Keyword),
|
|
||||||
/// Represents an identifier token
|
|
||||||
Identifier(String),
|
|
||||||
/// Represents a symbol token
|
|
||||||
Symbol(Symbol),
|
|
||||||
/// Represents an end of file token
|
|
||||||
EOF,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for TokenType {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
TokenType::String(s) => write!(f, "{}", s),
|
|
||||||
TokenType::Number(n) => write!(f, "{}", n),
|
|
||||||
TokenType::Boolean(b) => write!(f, "{}", b),
|
|
||||||
TokenType::Keyword(k) => write!(f, "{:?}", k),
|
|
||||||
TokenType::Identifier(i) => write!(f, "{}", i),
|
|
||||||
TokenType::Symbol(s) => write!(f, "{:?}", s),
|
|
||||||
TokenType::EOF => write!(f, "EOF"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
|
|
||||||
pub enum Number {
|
|
||||||
/// Represents an integer number
|
|
||||||
Integer(u128),
|
|
||||||
/// Represents a decimal type number with a precision of 64 bits
|
|
||||||
Decimal(Decimal),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for Number {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
Number::Integer(i) => write!(f, "{}", i),
|
|
||||||
Number::Decimal(d) => write!(f, "{}", d.to_string()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
|
|
||||||
pub enum Symbol {
|
|
||||||
// Single Character Symbols
|
|
||||||
/// Represents the `(` symbol
|
|
||||||
LParen,
|
|
||||||
/// Represents the `)` symbol
|
|
||||||
RParen,
|
|
||||||
/// Represents the `{` symbol
|
|
||||||
LBrace,
|
|
||||||
/// Represents the `}` symbol
|
|
||||||
RBrace,
|
|
||||||
/// Represents the `[` symbol
|
|
||||||
LBracket,
|
|
||||||
/// Represents the `]` symbol
|
|
||||||
RBracket,
|
|
||||||
/// Represents the `;` symbol
|
|
||||||
Semicolon,
|
|
||||||
/// Represents the `:` symbol
|
|
||||||
Colon,
|
|
||||||
/// Represents the `,` symbol
|
|
||||||
Comma,
|
|
||||||
/// Represents the `+` symbol
|
|
||||||
Plus,
|
|
||||||
/// Represents the `-` symbol
|
|
||||||
Minus,
|
|
||||||
/// Represents the `*` symbol
|
|
||||||
Asterisk,
|
|
||||||
/// Represents the `/` symbol
|
|
||||||
Slash,
|
|
||||||
/// Represents the `<` symbol
|
|
||||||
LessThan,
|
|
||||||
/// Represents the `>` symbol
|
|
||||||
GreaterThan,
|
|
||||||
/// Represents the `=` symbol
|
|
||||||
Assign,
|
|
||||||
/// Represents the `!` symbol
|
|
||||||
LogicalNot,
|
|
||||||
/// Represents the `.` symbol
|
|
||||||
Dot,
|
|
||||||
/// Represents the `^` symbol
|
|
||||||
Caret,
|
|
||||||
|
|
||||||
// Double Character Symbols
|
|
||||||
/// Represents the `==` symbol
|
|
||||||
Equal,
|
|
||||||
/// Represents the `!=` symbol
|
|
||||||
NotEqual,
|
|
||||||
/// Represents the `&&` Symbol
|
|
||||||
LogicalAnd,
|
|
||||||
// Represents the `||` Symbol
|
|
||||||
LogicalOr,
|
|
||||||
/// Represents the `<=` symbol
|
|
||||||
LessThanOrEqual,
|
|
||||||
/// Represents the `>=` symbol
|
|
||||||
GreaterThanOrEqual,
|
|
||||||
/// Represents the `**` symbol
|
|
||||||
Exp,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Symbol {
|
|
||||||
pub fn is_operator(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_comparison(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
Symbol::LessThan
|
|
||||||
| Symbol::GreaterThan
|
|
||||||
| Symbol::Equal
|
|
||||||
| Symbol::NotEqual
|
|
||||||
| Symbol::LessThanOrEqual
|
|
||||||
| Symbol::GreaterThanOrEqual => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_logical(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
Symbol::LogicalAnd | Symbol::LogicalOr => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
|
|
||||||
pub enum Keyword {
|
|
||||||
/// Represents the `let` keyword
|
|
||||||
Let,
|
|
||||||
/// Represents the `fn` keyword
|
|
||||||
Fn,
|
|
||||||
/// Represents the `if` keyword
|
|
||||||
If,
|
|
||||||
/// Represents the `device` keyword. Useful for defining a device at a specific address (ex. d0, d1, d2, etc.)
|
|
||||||
Device,
|
|
||||||
/// Represents the `else` keyword
|
|
||||||
Else,
|
|
||||||
/// Represents the `return` keyword
|
|
||||||
Return,
|
|
||||||
/// Represents the `enum` keyword
|
|
||||||
Enum,
|
|
||||||
/// Represents the `loop` keyword
|
|
||||||
Loop,
|
|
||||||
/// Represents the `break` keyword
|
|
||||||
Break,
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user