First pass getting a logos tokenizer up and running
This commit is contained in:
73
rust_compiler/Cargo.lock
generated
73
rust_compiler/Cargo.lock
generated
@@ -28,6 +28,15 @@ dependencies = [
|
|||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstream"
|
name = "anstream"
|
||||||
version = "0.6.21"
|
version = "0.6.21"
|
||||||
@@ -114,6 +123,12 @@ dependencies = [
|
|||||||
"windows-link",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "beef"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitflags"
|
name = "bitflags"
|
||||||
version = "1.3.2"
|
version = "1.3.2"
|
||||||
@@ -327,6 +342,12 @@ dependencies = [
|
|||||||
"bitflags",
|
"bitflags",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fnv"
|
||||||
|
version = "1.0.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "funty"
|
name = "funty"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
@@ -434,6 +455,40 @@ version = "0.2.178"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
|
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "logos"
|
||||||
|
version = "0.16.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a790d11254054e5dc83902dba85d253ff06ceb0cfafb12be8773435cb9dfb4f4"
|
||||||
|
dependencies = [
|
||||||
|
"logos-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "logos-codegen"
|
||||||
|
version = "0.16.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f60337c43a38313b58871f8d5d76872b8e17aa9d51fad494b5e76092c0ce05f5"
|
||||||
|
dependencies = [
|
||||||
|
"beef",
|
||||||
|
"fnv",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
"rustc_version",
|
||||||
|
"syn 2.0.111",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "logos-derive"
|
||||||
|
version = "0.16.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d151b2ae667f69e10b8738f5cac0c746faa22b2e15ea7e83b55476afec3767dc"
|
||||||
|
dependencies = [
|
||||||
|
"logos-codegen",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lsp-types"
|
name = "lsp-types"
|
||||||
version = "0.97.0"
|
version = "0.97.0"
|
||||||
@@ -644,6 +699,23 @@ dependencies = [
|
|||||||
"getrandom",
|
"getrandom",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.8.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rend"
|
name = "rend"
|
||||||
version = "0.4.2"
|
version = "0.4.2"
|
||||||
@@ -947,6 +1019,7 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"helpers",
|
"helpers",
|
||||||
|
"logos",
|
||||||
"lsp-types",
|
"lsp-types",
|
||||||
"quick-error",
|
"quick-error",
|
||||||
"rust_decimal",
|
"rust_decimal",
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ rust_decimal = { workspace = true }
|
|||||||
quick-error = { workspace = true }
|
quick-error = { workspace = true }
|
||||||
lsp-types = { workspace = true }
|
lsp-types = { workspace = true }
|
||||||
helpers = { path = "../helpers" }
|
helpers = { path = "../helpers" }
|
||||||
|
logos = "0.16"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
anyhow = { version = "^1" }
|
anyhow = { version = "^1" }
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use helpers::prelude::*;
|
use helpers::prelude::*;
|
||||||
|
use logos::{Lexer, Logos};
|
||||||
use rust_decimal::Decimal;
|
use rust_decimal::Decimal;
|
||||||
|
|
||||||
// Define a local macro to consume the list
|
// Define a local macro to consume the list
|
||||||
@@ -79,24 +80,153 @@ impl Temperature {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Hash, Eq, Clone)]
|
macro_rules! symbol {
|
||||||
|
($var:ident) => {
|
||||||
|
|_| Symbol::$var
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! keyword {
|
||||||
|
($var:ident) => {
|
||||||
|
|_| Keyword::$var
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Hash, Eq, Clone, Logos)]
|
||||||
pub enum TokenType {
|
pub enum TokenType {
|
||||||
|
// matches strings with double quotes
|
||||||
|
#[regex(r#""(?:[^"\\]|\\.)*""#, |v| v.slice().to_string())]
|
||||||
|
// matches strings with single quotes
|
||||||
|
#[regex(r#"'(?:[^'\\]|\\.)*'"#, |v| v.slice().to_string())]
|
||||||
/// Represents a string token
|
/// Represents a string token
|
||||||
String(String),
|
String(String),
|
||||||
|
|
||||||
|
#[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([cfk])?", parse_number)]
|
||||||
/// Represents a number token
|
/// Represents a number token
|
||||||
Number(Number),
|
Number(Number),
|
||||||
|
|
||||||
|
#[token("true", |_| true)]
|
||||||
|
#[token("false", |_| false)]
|
||||||
/// Represents a boolean token
|
/// Represents a boolean token
|
||||||
Boolean(bool),
|
Boolean(bool),
|
||||||
|
|
||||||
|
#[token("continue", keyword!(Continue))]
|
||||||
|
#[token("const", keyword!(Const))]
|
||||||
|
#[token("let", keyword!(Let))]
|
||||||
|
#[token("fn", keyword!(Fn))]
|
||||||
|
#[token("if", keyword!(If))]
|
||||||
|
#[token("device", keyword!(Device))]
|
||||||
|
#[token("else", keyword!(Else))]
|
||||||
|
#[token("return", keyword!(Return))]
|
||||||
|
#[token("enum", keyword!(Enum))]
|
||||||
|
#[token("loop", keyword!(Loop))]
|
||||||
|
#[token("break", keyword!(Break))]
|
||||||
|
#[token("while", keyword!(While))]
|
||||||
/// Represents a keyword token
|
/// Represents a keyword token
|
||||||
Keyword(Keyword),
|
Keyword(Keyword),
|
||||||
|
|
||||||
|
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |v| v.slice().to_string())]
|
||||||
/// Represents an identifier token
|
/// Represents an identifier token
|
||||||
Identifier(String),
|
Identifier(String),
|
||||||
|
|
||||||
|
#[token("(", symbol!(LParen))]
|
||||||
|
#[token(")", symbol!(RParen))]
|
||||||
|
#[token("{", symbol!(LBrace))]
|
||||||
|
#[token("}", symbol!(RBrace))]
|
||||||
|
#[token("[", symbol!(LBracket))]
|
||||||
|
#[token("]", symbol!(RBracket))]
|
||||||
|
#[token(";", symbol!(Semicolon))]
|
||||||
|
#[token(":", symbol!(Colon))]
|
||||||
|
#[token(",", symbol!(Comma))]
|
||||||
|
#[token("+", symbol!(Plus))]
|
||||||
|
#[token("-", symbol!(Minus))]
|
||||||
|
#[token("*", symbol!(Asterisk))]
|
||||||
|
#[token("/", symbol!(Slash))]
|
||||||
|
#[token("<", symbol!(LessThan))]
|
||||||
|
#[token(">", symbol!(GreaterThan))]
|
||||||
|
#[token("=", symbol!(Assign))]
|
||||||
|
#[token("!", symbol!(LogicalNot))]
|
||||||
|
#[token(".", symbol!(Dot))]
|
||||||
|
#[token("^", symbol!(Caret))]
|
||||||
|
#[token("%", symbol!(Percent))]
|
||||||
|
#[token("==", symbol!(Equal))]
|
||||||
|
#[token("!=", symbol!(NotEqual))]
|
||||||
|
#[token("&&", symbol!(LogicalAnd))]
|
||||||
|
#[token("||", symbol!(LogicalOr))]
|
||||||
|
#[token("<=", symbol!(LessThanOrEqual))]
|
||||||
|
#[token(">=", symbol!(GreaterThanOrEqual))]
|
||||||
|
#[token("**", symbol!(Exp))]
|
||||||
/// Represents a symbol token
|
/// Represents a symbol token
|
||||||
Symbol(Symbol),
|
Symbol(Symbol),
|
||||||
|
|
||||||
|
#[regex(r"///[\n]*", |val| Comment::Doc(val.slice()[3..].trim().to_string()))]
|
||||||
|
#[regex(r"//[\n]*", |val| Comment::Line(val.slice()[2..].trim().to_string()))]
|
||||||
|
/// Represents a comment, both a line comment and a doc comment
|
||||||
|
Comment(Comment),
|
||||||
|
|
||||||
|
#[end]
|
||||||
/// Represents an end of file token
|
/// Represents an end of file token
|
||||||
EOF,
|
EOF,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Hash, Debug, Eq, PartialEq, Clone)]
|
||||||
|
pub enum Comment {
|
||||||
|
Line(String),
|
||||||
|
Doc(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_number<'a>(lexer: &mut Lexer<'a, TokenType>) -> Option<Number> {
|
||||||
|
let slice = lexer.slice();
|
||||||
|
let last_char = slice.chars().last()?;
|
||||||
|
let (num_str, suffix) = match last_char {
|
||||||
|
'c' | 'k' | 'f' => (&slice[..slice.len() - 1], Some(last_char)),
|
||||||
|
_ => (slice, None),
|
||||||
|
};
|
||||||
|
|
||||||
|
let clean_str = if num_str.contains('_') {
|
||||||
|
num_str.replace('_', "")
|
||||||
|
} else {
|
||||||
|
num_str.to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
let num = if clean_str.contains('.') {
|
||||||
|
Number::Decimal(clean_str.parse::<Decimal>().ok()?)
|
||||||
|
} else {
|
||||||
|
Number::Integer(clean_str.parse::<i128>().ok()?)
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(suffix) = suffix {
|
||||||
|
Some(
|
||||||
|
match suffix {
|
||||||
|
'c' => Temperature::Celsius(num),
|
||||||
|
'f' => Temperature::Fahrenheit(num),
|
||||||
|
'k' => Temperature::Kelvin(num),
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
.to_kelvin(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
Some(num)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Comment {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Line(c) => write!(f, "// {}", c),
|
||||||
|
Self::Doc(d) => {
|
||||||
|
let lines = d
|
||||||
|
.split('\n')
|
||||||
|
.map(|s| format!("/// {s}"))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n");
|
||||||
|
|
||||||
|
write!(f, "{}", lines)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Documentation for TokenType {
|
impl Documentation for TokenType {
|
||||||
fn docs(&self) -> String {
|
fn docs(&self) -> String {
|
||||||
match self {
|
match self {
|
||||||
@@ -128,6 +258,7 @@ impl From<TokenType> for u32 {
|
|||||||
| Keyword::Return => 4,
|
| Keyword::Return => 4,
|
||||||
_ => 5,
|
_ => 5,
|
||||||
},
|
},
|
||||||
|
TokenType::Comment(_) => 8,
|
||||||
TokenType::Identifier(s) => {
|
TokenType::Identifier(s) => {
|
||||||
if is_syscall(&s) {
|
if is_syscall(&s) {
|
||||||
10
|
10
|
||||||
@@ -160,6 +291,7 @@ impl std::fmt::Display for TokenType {
|
|||||||
TokenType::Keyword(k) => write!(f, "{:?}", k),
|
TokenType::Keyword(k) => write!(f, "{:?}", k),
|
||||||
TokenType::Identifier(i) => write!(f, "{}", i),
|
TokenType::Identifier(i) => write!(f, "{}", i),
|
||||||
TokenType::Symbol(s) => write!(f, "{}", s),
|
TokenType::Symbol(s) => write!(f, "{}", s),
|
||||||
|
TokenType::Comment(c) => write!(f, "{}", c),
|
||||||
TokenType::EOF => write!(f, "EOF"),
|
TokenType::EOF => write!(f, "EOF"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user