spl/src/lexer.rs

494 lines
18 KiB
Rust

mod compat;
use std::{mem, sync::Arc};
use crate::runtime::*;
use compat::{match_compat, transform_compat};
use readformat::*;
#[derive(Debug, PartialEq, Eq)]
pub enum LexerError {
FunctionBlockExpected,
WrongFunctionDeclaration,
InvalidInclude,
InvalidConstructBlock,
InvalidNumber(String),
ArgsWithoutCall,
}
pub fn lex(compat: bool, input: String) -> Result<Words, LexerError> {
let str_words = parse(input);
Ok(read_block(&str_words[..], false, compat)?.1)
}
fn read_block(
str_words: &[String],
isfn: bool,
compat: bool,
) -> Result<(Option<u32>, Words, usize), LexerError> {
read_block_dyn(str_words, isfn, "}".to_owned(), compat)
}
fn read_block_dyn(
str_words: &[String],
isfn: bool,
endword: String,
mut compat: bool,
) -> Result<(Option<u32>, Words, usize), LexerError> {
if str_words.is_empty() {
return Ok((None, Words::new(Vec::new()), 0));
}
let mut rem = None;
let mut words = Vec::new();
let mut i = 0;
if str_words[0] == "{" && isfn {
let mut r = 0;
while str_words[r + 1] != "|" && !compat {
r += 1;
if r >= str_words.len() - 1 {
r = 0;
if !compat {
eprintln!("spl: parsing in compatibility mode");
compat = true;
}
}
}
i += r + 2 - compat as usize;
rem = Some(r as u32);
}
while i < str_words.len() {
let word = if !compat {
str_words[i].to_owned()
} else {
transform_compat(&str_words[i])
};
if compat && match_compat(word.to_owned(), str_words, &mut words, &mut i) {
continue;
}
match word.as_str() {
"native" => {
i += 1;
if !compat {
eprintln!("spl: parsing in compatibility mode");
compat = true;
}
eprintln!(
"spl: [compat] code tried to load a native function `{}`",
str_words[i]
);
}
"def" => {
while let Some(w) = str_words[i + 1].strip_suffix(',') {
words.push(Word::Key(Keyword::Def(w.to_owned())));
i += 1;
}
words.push(Word::Key(Keyword::Def(str_words[i + 1].to_owned())));
i += 1;
}
"func" => {
if compat && str_words[i + 1] == "{" {
continue;
}
if let Some(dat) = readf1("func\0{}\0{", str_words[i..=i + 2].join("\0").as_str()) {
let block = read_block(&str_words[i + 2..], true, compat)?;
i += 2 + block.2;
words.push(Word::Key(Keyword::Func(
dat.to_owned(),
block.0.ok_or(LexerError::FunctionBlockExpected)?,
block.1,
)));
} else if !compat {
if let Some(dat) =
readf1("func\0{}\0@rust", str_words[i..=i + 2].join("\0").as_str())
{
i += 3;
words.push(Word::Key(Keyword::FuncOf(
dat.to_owned(),
str_words[i][2..].to_owned(),
FuncImplType::Rust,
)));
}
} else {
return Err(LexerError::FunctionBlockExpected);
}
}
// lambda
"{" => {
let block = read_block(&str_words[i..], true, compat)?;
i += block.2;
words.push(Word::Const(Value::Func(AFunc::new(Func {
ret_count: block.0.ok_or(LexerError::FunctionBlockExpected)?,
to_call: FuncImpl::SPL(block.1),
origin: Arc::new(Frame::dummy()),
fname: None,
name: "dyn".to_owned(),
run_as_base: false,
}))))
}
// <| .. > lambda
"|" => {
let block = read_block_dyn(&str_words[i + 1..], false, ">".to_owned(), compat)?;
i += block.2;
words.push(Word::Const(Value::Func(AFunc::new(Func {
ret_count: 0,
to_call: FuncImpl::SPL(block.1),
origin: Arc::new(Frame::dummy()),
fname: None,
name: "dyn-arg".to_owned(),
run_as_base: false,
}))))
}
x if x.len() >= 2 && &x[0..2] == "!{" => {
words.push(Word::Const(Value::Str(x[2..].to_owned())));
}
"<" => {
let block = read_block_dyn(&str_words[i + 1..], false, ">".to_owned(), compat)?;
i += block.2 + 1;
let mut block = block.1.words;
match words.remove(words.len() - 1) {
Word::Call(a, b, c) => {
words.append(&mut block);
words.push(Word::Call(a, b, c));
}
Word::ObjCall(a, b, c) => {
words.push(Word::Key(Keyword::ObjPush));
words.append(&mut block);
words.push(Word::Key(Keyword::ObjPop));
words.push(Word::ObjCall(a, b, c));
}
_ => return Err(LexerError::ArgsWithoutCall),
}
}
"construct" => {
let name = str_words[i + 1].to_owned();
let is_namespace = if str_words[i + 2] == "namespace" {
i += 1;
true
} else {
false
};
if str_words[i + 2] != "{" {
return Err(LexerError::InvalidConstructBlock);
}
let mut fields = Vec::new();
i += 3;
while str_words[i] != ";" && str_words[i] != "}" {
fields.push(str_words[i].to_owned());
i += 1;
}
let mut methods = Vec::new();
let mut has_construct = false;
if str_words[i] == ";" {
i += 1;
while str_words[i] != "}" {
let name = str_words[i].to_owned();
if name == "construct" {
has_construct = true;
}
let block = read_block(&str_words[i + 1..], true, compat)?;
i += 1 + block.2;
methods.push((
name,
(block.0.ok_or(LexerError::FunctionBlockExpected)?, block.1),
));
i += 1;
}
}
if !has_construct && !is_namespace {
methods.push(("construct".to_string(), (1, Words { words: vec![] })));
}
words.push(Word::Key(Keyword::Construct(
name,
fields,
methods,
is_namespace,
)));
}
"include" if !compat => {
if let Some(x) = readf(
"include\0{}\0in\0{}",
str_words[i..i + 4].join("\0").as_str(),
) {
words.push(Word::Key(Keyword::Include(
x[0].to_owned(),
x[1].to_owned(),
)))
} else {
return Err(LexerError::InvalidInclude);
}
i += 3;
}
"use" => {
let item = str_words[i + 1].to_owned();
i += 1;
words.push(Word::Key(Keyword::Use(item)));
}
"while" => {
let cond = read_block(&str_words[i + 2..], false, compat)?;
i += 2 + cond.2;
let blk = read_block(&str_words[i + 2..], false, compat)?;
i += 2 + blk.2;
words.push(Word::Key(Keyword::While(cond.1, blk.1)));
}
"if" => {
let blk = read_block(&str_words[i + 2..], false, compat)?;
i += 2 + blk.2;
words.push(Word::Key(Keyword::If(blk.1)));
}
"catch" => {
let mut types = Vec::new();
i += 1;
while &str_words[i] != "{" {
types.push(str_words[i].to_owned());
i += 1;
}
let blk = read_block(&str_words[i + 1..], false, compat)?;
i += 2 + blk.2;
let ctch = read_block(&str_words[i + 1..], false, compat)?;
i += 1 + ctch.2;
words.push(Word::Key(Keyword::Catch(types, blk.1, ctch.1)))
}
"with" => {
let mut vars = Vec::new();
i += 1;
while &str_words[i] != ";" {
vars.push(str_words[i].to_owned());
i += 1;
}
words.push(Word::Key(Keyword::With(vars)));
}
"=" => {
if str_words[i + 1] == ">" {
i += 1;
let cword = &str_words[i + 1];
if cword.contains(|c| c == '?' || c == '!')
&& !cword.contains(|c: char| c == '^' || !c.is_ascii_punctuation())
{
i += 1;
}
let pushing = if cword.contains('?') {
words.push(Word::Call("dup".to_owned(), false, 0));
true
} else {
false
};
let throwing = cword.contains('!');
if str_words[i + 1] == "[" {
i += 1;
let mut block =
read_block_dyn(&str_words[i + 1..], false, "]".to_owned(), compat)?;
i += block.2 + 1;
words.push(Word::Call("[".to_owned(), false, 0));
words.append(&mut block.1.words);
words.push(Word::Call("]".to_owned(), false, 0));
} else {
words.append(
&mut read_block_dyn(
&[str_words[i + 1].clone()],
false,
"".to_owned(),
false,
)?
.1
.words,
);
i += 1;
}
words.push(Word::Call("match".to_owned(), false, 0));
if throwing {
words.push(Word::Call("_'match-else-error".to_owned(), false, 0));
}
if pushing {
words.push(Word::Call("_'match-else-push".to_owned(), false, 0));
}
} else {
words.push(Word::Call("=".to_owned(), false, 0));
}
}
"inline-callable" => {
words.push(Word::Key(Keyword::InlineCallable));
}
"!!-" => {
words.push(Word::Key(Keyword::InlineStart));
}
"-!!" => {
words.push(Word::Key(Keyword::InlineEnd));
}
x if x == endword => {
break;
}
x if x.starts_with('\"') => {
words.push(Word::Const(Value::Str(x[1..].to_owned())));
}
x if x.starts_with('^') => {
words.push(Word::Const(Value::Str(x[1..].to_owned())));
}
x if x.chars().all(|c| c.is_numeric() || c == '_' || c == '-')
&& !x.starts_with('_')
&& x.contains(char::is_numeric) =>
{
words.push(Word::Const(Value::Mega(
x.parse()
.map_err(|_| LexerError::InvalidNumber(x.to_owned()))?,
)));
}
x if x
.chars()
.all(|c| c.is_numeric() || c == '.' || c == '_' || c == '-')
&& !x.starts_with('_')
&& x.contains(char::is_numeric) =>
{
words.push(Word::Const(Value::Double(
x.parse()
.map_err(|_| LexerError::InvalidNumber(x.to_owned()))?,
)));
}
x => {
let mut word = x.split(':').next().unwrap(); // SAFETY: One item always exists after a split.
if !word.is_empty() {
let mut ra = 0;
while word.starts_with('&') {
ra += 1;
word = &word[1..];
}
if let Some(word) = word.strip_suffix(';') {
words.push(Word::Call(word.to_owned(), true, ra));
} else {
words.push(Word::Call(word.to_owned(), false, ra));
}
}
for mut word in x.split(':').skip(1) {
let mut ra = 0;
while word.starts_with('&') {
ra += 1;
word = &word[1..];
}
if let Some(word) = word.strip_suffix(';') {
words.push(Word::ObjCall(word.to_owned(), true, ra));
} else {
words.push(Word::ObjCall(word.to_owned(), false, ra));
}
}
}
}
i += 1;
}
Ok((rem, Words { words }, i))
}
pub fn parse(mut input: String) -> Vec<String> {
if input.starts_with("#!") {
input = input.split_off(input.find('\n').expect("cannot have #! without newline"));
}
let mut words = Vec::new();
let mut s = String::new();
let mut exclam = false;
let mut raw = 0;
for line in input.split('\n') {
let mut in_string = false;
let mut escaping = false;
let mut was_in_string = false;
for c in line.chars() {
if in_string {
if escaping {
if raw == 0 {
if c == '\\' {
s += "\\";
}
if c == 'n' {
s += "\n";
}
if c == 'r' {
s += "\r";
}
if c == '"' {
s += "\"";
}
escaping = false;
continue;
} else {
escaping = false;
}
} else if c == '"' {
in_string = false;
escaping = false;
was_in_string = true;
if raw == 0 {
continue;
}
}
if c == '\\' {
escaping = true;
if raw == 0 {
continue;
}
}
} else {
if c == '"' {
s += "\"";
in_string = true;
continue;
}
if raw == 0 {
if c == ';' && was_in_string {
s = String::new();
continue;
}
if c == '(' || c == ')' {
continue;
}
if c == '<' || c == '>' {
if s.is_empty() {
words.push(c.to_string());
continue;
}
words.push(s);
s = String::new();
was_in_string = false;
words.push(c.to_string());
continue;
}
if c == ' ' || c == '\t' {
if s.is_empty() {
continue;
}
words.push(s);
s = String::new();
was_in_string = false;
continue;
}
if c == '{' && exclam {
raw = 1;
}
exclam = false;
if c == '!' {
exclam = true;
}
} else {
if c == '{' {
raw += 1;
}
if c == '}' {
raw -= 1;
}
if raw == 0 {
words.push(mem::take(&mut s));
continue;
}
}
}
was_in_string = false;
s += String::from(c).as_str();
}
if !s.is_empty() && raw == 0 {
words.push(mem::take(&mut s));
}
}
if !s.is_empty() {
words.push(mem::take(&mut s));
}
words
}