Files
stationeers_lang/rust_compiler/libs/tokenizer/src/token.rs

846 lines
25 KiB
Rust

use std::borrow::Cow;
use helpers::prelude::*;
use logos::{Lexer, Logos, Skip, Span};
use lsp_types::{Diagnostic, DiagnosticSeverity, Position, Range};
use rust_decimal::Decimal;
use thiserror::Error;
#[derive(Debug, Error, Default, Clone, PartialEq)]
pub enum LexError {
#[error("Attempted to parse an invalid number: {2}")]
NumberParse(usize, Span, String),
#[error("An invalid character was found in token stream: {2}")]
InvalidInput(usize, Span, String),
#[default]
#[error("An unknown error occurred")]
Other,
}
impl From<LexError> for Diagnostic {
fn from(value: LexError) -> Self {
match value {
LexError::NumberParse(line, col, str) | LexError::InvalidInput(line, col, str) => {
Diagnostic {
range: Range {
start: Position {
character: col.start as u32,
line: line as u32,
},
end: Position {
line: line as u32,
character: col.end as u32,
},
},
severity: Some(DiagnosticSeverity::ERROR),
message: str,
..Default::default()
}
}
_ => Diagnostic::default(),
}
}
}
impl LexError {
pub fn from_lexer<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Self {
let mut span = lex.span();
let line = lex.extras.line_count;
span.start -= lex.extras.line_start_index;
span.end -= lex.extras.line_start_index;
Self::InvalidInput(line, span, lex.slice().chars().as_str().to_string())
}
}
// Define a local macro to consume the list
macro_rules! generate_check {
($($name:literal),*) => {
pub fn is_syscall(s: &str) -> bool {
matches!(s, $($name)|*)
}
}
}
#[derive(Default)]
pub struct Extras {
pub line_count: usize,
pub line_start_index: usize,
}
fn update_line_index<'a>(lex: &mut Lexer<'a, TokenType<'a>>) -> Skip {
lex.extras.line_count += 1;
lex.extras.line_start_index = lex.span().end;
Skip
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token<'a> {
/// The type of the token
pub token_type: TokenType<'a>,
/// The line where the token was found
pub line: usize,
/// The span where the token starts and ends
pub span: Span,
}
impl<'a> std::fmt::Display for Token<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.token_type)
}
}
impl<'a> Token<'a> {
pub fn new(token_type: TokenType<'a>, line: usize, span: Span) -> Self {
Self {
token_type,
line,
span,
}
}
}
macro_rules! symbol {
($var:ident) => {
|_| Symbol::$var
};
}
macro_rules! keyword {
($var:ident) => {
|_| Keyword::$var
};
}
#[derive(Debug, PartialEq, Hash, Eq, Clone, Logos)]
#[logos(skip r"[ \t\f]+")]
#[logos(extras = Extras)]
#[logos(error(LexError, LexError::from_lexer))]
pub enum TokenType<'a> {
#[regex(r"\n", update_line_index)]
Newline,
// matches strings with double quotes
#[regex(r#""(?:[^"\\]|\\.)*""#, |v| {
let str = v.slice();
Cow::from(&str[1..str.len() - 1])
})]
// matches strings with single quotes
#[regex(r#"'(?:[^'\\]|\\.)*'"#, |v| {
let str = v.slice();
Cow::from(&str[1..str.len() - 1])
})]
/// Represents a string token
String(Cow<'a, str>),
#[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)]
/// Represents a number token
Number(Number),
#[token("true", |_| true)]
#[token("false", |_| false)]
/// Represents a boolean token
Boolean(bool),
#[token("continue", keyword!(Continue))]
#[token("const", keyword!(Const))]
#[token("let", keyword!(Let))]
#[token("fn", keyword!(Fn))]
#[token("if", keyword!(If))]
#[token("device", keyword!(Device))]
#[token("else", keyword!(Else))]
#[token("return", keyword!(Return))]
#[token("enum", keyword!(Enum))]
#[token("loop", keyword!(Loop))]
#[token("break", keyword!(Break))]
#[token("while", keyword!(While))]
/// Represents a keyword token
Keyword(Keyword),
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| Cow::from(v.slice()))]
/// Represents an identifier token
Identifier(Cow<'a, str>),
#[token("(", symbol!(LParen))]
#[token(")", symbol!(RParen))]
#[token("{", symbol!(LBrace))]
#[token("}", symbol!(RBrace))]
#[token("[", symbol!(LBracket))]
#[token("]", symbol!(RBracket))]
#[token(";", symbol!(Semicolon))]
#[token(":", symbol!(Colon))]
#[token(",", symbol!(Comma))]
#[token("+", symbol!(Plus))]
#[token("-", symbol!(Minus))]
#[token("*", symbol!(Asterisk))]
#[token("/", symbol!(Slash))]
#[token("<", symbol!(LessThan))]
#[token(">", symbol!(GreaterThan))]
#[token("=", symbol!(Assign))]
#[token("!", symbol!(LogicalNot))]
#[token(".", symbol!(Dot))]
#[token("^", symbol!(Caret))]
#[token("%", symbol!(Percent))]
#[token("?", symbol!(Question))]
#[token("==", symbol!(Equal))]
#[token("!=", symbol!(NotEqual))]
#[token("&&", symbol!(LogicalAnd))]
#[token("||", symbol!(LogicalOr))]
#[token("<=", symbol!(LessThanOrEqual))]
#[token(">=", symbol!(GreaterThanOrEqual))]
#[token("**", symbol!(Exp))]
/// Represents a symbol token
Symbol(Symbol),
#[token("//", |lex| Comment::Line(read_line(lex)))]
#[token("///", |lex| Comment::Doc(read_line(lex)))]
/// Represents a comment, both a line comment and a doc comment
Comment(Comment<'a>),
#[end]
/// Represents an end of file token
EOF,
}
fn read_line<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Cow<'a, str> {
let rem = lexer.remainder();
let len = rem.find('\n').unwrap_or(rem.len());
let content = rem[..len].trim().to_string();
lexer.bump(len);
Cow::from(content)
}
#[derive(Hash, Debug, Eq, PartialEq, Clone)]
pub enum Comment<'a> {
Line(Cow<'a, str>),
Doc(Cow<'a, str>),
}
fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType<'a>>) -> Result<Number, LexError> {
let slice = lexer.slice();
let last_char = slice.chars().last().unwrap_or_default();
let (num_str, suffix) = match last_char {
'c' | 'k' | 'f' => (&slice[..slice.len() - 1], Some(last_char)),
_ => (slice, None),
};
let clean_str = if num_str.contains('_') {
num_str.replace('_', "")
} else {
num_str.to_string()
};
let line = lexer.extras.line_count;
let mut span = lexer.span();
span.end -= lexer.extras.line_start_index;
span.start -= lexer.extras.line_start_index;
let unit = match suffix {
Some('c') => Unit::Celsius,
Some('f') => Unit::Fahrenheit,
Some('k') => Unit::Kelvin,
_ => Unit::None,
};
if clean_str.contains('.') {
Ok(Number::Decimal(
clean_str
.parse::<Decimal>()
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?,
unit,
))
} else {
Ok(Number::Integer(
clean_str
.parse::<i128>()
.map_err(|_| LexError::NumberParse(line, span, slice.to_string()))?,
unit,
))
}
}
impl<'a> std::fmt::Display for Comment<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Line(c) => write!(f, "// {}", c),
Self::Doc(d) => {
let lines = d
.split('\n')
.map(|s| format!("/// {s}"))
.collect::<Vec<_>>()
.join("\n");
write!(f, "{}", lines)
}
}
}
}
impl<'a> Documentation for TokenType<'a> {
fn docs(&self) -> String {
match self {
Self::Keyword(k) => k.docs(),
_ => "".into(),
}
}
fn get_all_documentation() -> Vec<(&'static str, String)> {
Keyword::get_all_documentation()
}
}
helpers::with_syscalls!(generate_check);
impl<'a> From<TokenType<'a>> for u32 {
fn from(value: TokenType) -> Self {
match value {
TokenType::String(_) => 1,
TokenType::Number(_) => 2,
TokenType::Boolean(_) => 3,
TokenType::Keyword(k) => match k {
Keyword::If
| Keyword::Else
| Keyword::Loop
| Keyword::While
| Keyword::Break
| Keyword::Continue
| Keyword::Return => 4,
_ => 5,
},
TokenType::Comment(_) => 8,
TokenType::Identifier(s) => {
if is_syscall(&s) {
10
} else {
6
}
}
TokenType::Symbol(s) => {
if s.is_comparison() {
11
} else if s.is_operator() {
12
} else if s.is_logical() {
13
} else {
7
}
}
_ => 0,
}
}
}
impl<'a> std::fmt::Display for TokenType<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenType::String(s) => write!(f, "{}", s),
TokenType::Number(n) => write!(f, "{}", n),
TokenType::Boolean(b) => write!(f, "{}", b),
TokenType::Keyword(k) => write!(f, "{:?}", k),
TokenType::Identifier(i) => write!(f, "{}", i),
TokenType::Symbol(s) => write!(f, "{}", s),
TokenType::Comment(c) => write!(f, "{}", c),
TokenType::EOF => write!(f, "EOF"),
_ => write!(f, ""),
}
}
}
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
pub enum Unit {
None,
Celsius,
Fahrenheit,
Kelvin,
}
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
pub enum Number {
/// Represents an integer number
Integer(i128, Unit),
/// Represents a decimal type number with a precision of 64 bits
Decimal(Decimal, Unit),
}
impl Number {
pub fn unit(&self) -> Unit {
match self {
Number::Integer(_, u) => *u,
Number::Decimal(_, u) => *u,
}
}
pub fn has_unit(&self) -> bool {
self.unit() != Unit::None
}
}
impl From<bool> for Number {
fn from(value: bool) -> Self {
Self::Integer(if value { 1 } else { 0 }, Unit::None)
}
}
impl From<Number> for Decimal {
fn from(value: Number) -> Self {
let (val, unit) = match value {
Number::Decimal(d, u) => (d, u),
Number::Integer(i, u) => (Decimal::from(i), u),
};
match unit {
Unit::None | Unit::Kelvin => val,
Unit::Celsius => val + Decimal::new(27315, 2),
Unit::Fahrenheit => {
(val - Decimal::new(32, 0)) * Decimal::new(5, 0) / Decimal::new(9, 0)
+ Decimal::new(27315, 2)
}
}
}
}
impl std::ops::Neg for Number {
type Output = Number;
fn neg(self) -> Self::Output {
match self {
Self::Integer(i, u) => Self::Integer(-i, u),
Self::Decimal(d, u) => Self::Decimal(-d, u),
}
}
}
fn determine_target_unit(lhs_unit: Unit, rhs_unit: Unit) -> Option<Unit> {
if lhs_unit == rhs_unit {
return Some(lhs_unit);
}
if lhs_unit != Unit::None && rhs_unit == Unit::None {
return Some(lhs_unit);
}
if lhs_unit == Unit::None && rhs_unit != Unit::None {
return Some(rhs_unit);
}
// Mismatched units (C + F) -> Fallback to Kelvin/None
None
}
impl std::ops::Add for Number {
type Output = Number;
fn add(self, rhs: Self) -> Self::Output {
// If we can determine a common target unit (e.g. C + C = C, or C + Scalar = C),
// we preserve that unit. Otherwise, we convert to Kelvin (Decimal) and return Unit::None.
if let Some(target_unit) = determine_target_unit(self.unit(), rhs.unit()) {
return match (self, rhs) {
(Self::Integer(l, _), Self::Integer(r, _)) => Number::Integer(l + r, target_unit),
(Self::Decimal(l, _), Self::Decimal(r, _)) => Number::Decimal(l + r, target_unit),
(Self::Integer(l, _), Self::Decimal(r, _)) => {
Number::Decimal(Decimal::from(l) + r, target_unit)
}
(Self::Decimal(l, _), Self::Integer(r, _)) => {
Number::Decimal(l + Decimal::from(r), target_unit)
}
};
}
let l: Decimal = self.into();
let r: Decimal = rhs.into();
Number::Decimal(l + r, Unit::None)
}
}
impl std::ops::Sub for Number {
type Output = Number;
fn sub(self, rhs: Self) -> Self::Output {
if let Some(target_unit) = determine_target_unit(self.unit(), rhs.unit()) {
return match (self, rhs) {
(Self::Integer(l, _), Self::Integer(r, _)) => Number::Integer(l - r, target_unit),
(Self::Decimal(l, _), Self::Decimal(r, _)) => Number::Decimal(l - r, target_unit),
(Self::Integer(l, _), Self::Decimal(r, _)) => {
Number::Decimal(Decimal::from(l) - r, target_unit)
}
(Self::Decimal(l, _), Self::Integer(r, _)) => {
Number::Decimal(l - Decimal::from(r), target_unit)
}
};
}
let l: Decimal = self.into();
let r: Decimal = rhs.into();
Number::Decimal(l - r, Unit::None)
}
}
impl std::ops::Mul for Number {
type Output = Number;
fn mul(self, rhs: Self) -> Self::Output {
if let Some(target_unit) = determine_target_unit(self.unit(), rhs.unit()) {
return match (self, rhs) {
(Number::Integer(l, _), Number::Integer(r, _)) => {
Number::Integer(l * r, target_unit)
}
(Number::Integer(l, _), Number::Decimal(r, _)) => {
Number::Decimal(Decimal::from(l) * r, target_unit)
}
(Number::Decimal(l, _), Number::Integer(r, _)) => {
Number::Decimal(l * Decimal::from(r), target_unit)
}
(Number::Decimal(l, _), Number::Decimal(r, _)) => {
Number::Decimal(l * r, target_unit)
}
};
}
let l: Decimal = self.into();
let r: Decimal = rhs.into();
Number::Decimal(l * r, Unit::None)
}
}
impl std::ops::Div for Number {
type Output = Number;
fn div(self, rhs: Self) -> Self::Output {
if let Some(target_unit) = determine_target_unit(self.unit(), rhs.unit()) {
// Division always promotes to Decimal
let l_val = match self {
Self::Integer(i, _) => Decimal::from(i),
Self::Decimal(d, _) => d,
};
let r_val = match rhs {
Self::Integer(i, _) => Decimal::from(i),
Self::Decimal(d, _) => d,
};
return Number::Decimal(l_val / r_val, target_unit);
}
let l: Decimal = self.into();
let r: Decimal = rhs.into();
Number::Decimal(l / r, Unit::None)
}
}
impl std::ops::Rem for Number {
type Output = Number;
fn rem(self, rhs: Self) -> Self::Output {
if let Some(target_unit) = determine_target_unit(self.unit(), rhs.unit()) {
let l_val = match self {
Self::Integer(i, _) => Decimal::from(i),
Self::Decimal(d, _) => d,
};
let r_val = match rhs {
Self::Integer(i, _) => Decimal::from(i),
Self::Decimal(d, _) => d,
};
return Number::Decimal(l_val % r_val, target_unit);
}
let l: Decimal = self.into();
let r: Decimal = rhs.into();
Number::Decimal(l % r, Unit::None)
}
}
impl std::fmt::Display for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (val, unit) = match self {
Number::Integer(i, u) => (i.to_string(), u),
Number::Decimal(d, u) => (d.to_string(), u),
};
match unit {
Unit::None => write!(f, "{}", val),
Unit::Celsius => write!(f, "{}c", val),
Unit::Fahrenheit => write!(f, "{}f", val),
Unit::Kelvin => write!(f, "{}k", val),
}
}
}
impl std::convert::From<Number> for String {
fn from(value: Number) -> Self {
value.to_string()
}
}
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
pub enum Symbol {
// Single Character Symbols
/// Represents the `(` symbol
LParen,
/// Represents the `)` symbol
RParen,
/// Represents the `{` symbol
LBrace,
/// Represents the `}` symbol
RBrace,
/// Represents the `[` symbol
LBracket,
/// Represents the `]` symbol
RBracket,
/// Represents the `;` symbol
Semicolon,
/// Represents the `:` symbol
Colon,
/// Represents the `,` symbol
Comma,
/// Represents the `+` symbol
Plus,
/// Represents the `-` symbol
Minus,
/// Represents the `*` symbol
Asterisk,
/// Represents the `/` symbol
Slash,
/// Represents the `<` symbol
LessThan,
/// Represents the `>` symbol
GreaterThan,
/// Represents the `=` symbol
Assign,
/// Represents the `!` symbol
LogicalNot,
/// Represents the `.` symbol
Dot,
/// Represents the `^` symbol
Caret,
/// Represents the `%` symbol
Percent,
/// Represents the `?` symbol
Question,
// Double Character Symbols
/// Represents the `==` symbol
Equal,
/// Represents the `!=` symbol
NotEqual,
/// Represents the `&&` Symbol
LogicalAnd,
// Represents the `||` Symbol
LogicalOr,
/// Represents the `<=` symbol
LessThanOrEqual,
/// Represents the `>=` symbol
GreaterThanOrEqual,
/// Represents the `**` symbol
Exp,
}
impl Symbol {
pub fn is_operator(&self) -> bool {
matches!(
self,
Symbol::Plus
| Symbol::Minus
| Symbol::Asterisk
| Symbol::Slash
| Symbol::Exp
| Symbol::Percent
)
}
pub fn is_comparison(&self) -> bool {
matches!(
self,
Symbol::LessThan
| Symbol::GreaterThan
| Symbol::Equal
| Symbol::NotEqual
| Symbol::LessThanOrEqual
| Symbol::GreaterThanOrEqual,
)
}
pub fn is_logical(&self) -> bool {
matches!(self, Symbol::LogicalAnd | Symbol::LogicalOr)
}
}
impl std::fmt::Display for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Percent => write!(f, "%"),
Self::LParen => write!(f, "("),
Self::RParen => write!(f, ")"),
Self::LBrace => write!(f, "{{"),
Self::RBrace => write!(f, "}}"),
Self::LBracket => write!(f, "["),
Self::RBracket => write!(f, "]"),
Self::Semicolon => write!(f, ";"),
Self::Colon => write!(f, ":"),
Self::Plus => write!(f, "+"),
Self::Minus => write!(f, "-"),
Self::Comma => write!(f, ","),
Self::Asterisk => write!(f, "*"),
Self::Slash => write!(f, "/"),
Self::LessThan => write!(f, "<"),
Self::Question => write!(f, "?"),
Self::LessThanOrEqual => write!(f, "<="),
Self::GreaterThan => write!(f, ">"),
Self::GreaterThanOrEqual => write!(f, ">="),
Self::Assign => write!(f, "="),
Self::Equal => write!(f, "=="),
Self::LogicalAnd => write!(f, "&&"),
Self::LogicalOr => write!(f, "||"),
Self::LogicalNot => write!(f, "!"),
Self::NotEqual => write!(f, "!="),
Self::Dot => write!(f, "."),
Self::Caret => write!(f, "^"),
Self::Exp => write!(f, "**"),
}
}
}
documented! {
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)]
pub enum Keyword {
/// Represents the `continue` keyword. This will allow you to bypass the current iteration in a loop and start the next one.
/// ## Example
/// ```
/// let item = 0;
/// loop {
/// if (item % 2 == 0) {
/// // This will NOT increment `item` and will continue with the next iteration of the
/// // loop
/// continue;
/// }
/// item = item + 1;
/// }
/// ```
Continue,
/// Prepresents the `const` keyword. This allows you to define a variable that will never
/// change throughout the lifetime of the program, similar to `define` in IC10. If you are
/// not planning on mutating the variable (changing it), it is recommend you store it as a
/// const, as the compiler will not assign it to a register or stack variable.
///
/// ## Example
/// ```
/// const targetTemp = 20c;
/// device gasSensor = "d0";
/// device airCon = "d1";
///
/// airCon.On = gasSensor.Temperature > targetTemp;
/// ```
Const,
/// Represents the `let` keyword, used to declare variables within Slang.
/// ## Example
/// ```
/// // This variable now exists either in a register or the stack depending on how many
/// // free registers were available when declaring it.
/// let item = 0;
/// ```
Let,
/// Represents the `fn` keyword, used to declare functions within Slang.
/// # WARNING
/// Functions are currently unstable and are subject to change until stabilized. Use at
/// your own risk! (They are also heavily not optimized and produce a LOT of code bloat)
/// ## Example
/// ```
/// // This allows you to now call `doSomething` with specific arguments.
/// fn doSomething(arg1, arg2) {
///
/// }
/// ```
Fn,
/// Represents the `if` keyword, allowing you to create branched logic.
/// ## Example
/// ```
/// let i = 0;
/// if (i == 0) {
/// i = 1;
/// }
/// // At this line, `i` is now `1`
/// ```
If,
/// Represents the `device` keyword. Useful for defining a device at a specific address
/// (ex. d0, d1, d2, etc.). This also allows you to perform direct operations ON a device.
/// ## Example
/// ```
/// device self = "db";
///
/// // This is the same as `s db Setting 123`
/// self.Setting = 123;
/// ```
Device,
/// Represents the `else` keyword. Useful if you want to check a condition but run run
/// seperate logic in case that condition fails.
/// ## Example
/// ```
/// device self = "db";
/// let i = 0;
/// if (i < 0) {
/// self.Setting = 0;
/// } else {
/// self.Setting = 1;
/// }
/// // Here, the `Setting` on the current housing is `1` because i was NOT less than 0
/// ```
Else,
/// Represents the `return` keyword. Allows you to pass values from a function back to
/// the caller.
/// ## Example
/// ```
/// fn doSomething() {
/// return 1 + 2;
/// }
///
/// // `returnedValue` now holds the value `3`
/// let returnedValue = doSomething();
/// ```
Return,
/// Represents the `enum` keyword. This is currently not supported, but is kept as a
/// reserved keyword in the future case that this is implemented.
Enum,
/// Represents the `loop` keyword. This allows you to create an infinate loop, but can be
/// broken with the `break` keyword.
/// ## Example
/// ```
/// device self = "db";
/// let i = 0;
/// loop {
/// i = i + 1;
/// // The current housing will infinately increment it's `Setting` value.
/// self.Setting = i;
/// }
/// ```
Loop,
/// Represents the `break` keyword. This allows you to "break out of" a loop prematurely,
/// such as when an if() conditon is true, etc.
/// ## Example
/// ```
/// let i = 0;
/// // This loop will run until the value of `i` is greater than 10,000,
/// // which will then trigger the `break` keyword and it will stop looping
/// loop {
/// if (i > 10_000) {
/// break;
/// }
/// i = i + 1;
/// }
/// ```
Break,
/// Represents the `while` keyword. This is similar to the `loop` keyword but different in
/// that you don't need an `if` statement to break out of a loop, that is handled
/// automatically when invoking `while`
/// ## Example
/// ```
/// let i = 0;
/// // This loop will run until the value of `i` is greater than 10,000, in which case the
/// // while loop will automatically stop running and code will continue AFTER the last
/// // bracket.
/// while (i < 10_000) {
/// i = i + 1;
/// }
/// ```
While,
}
}