implement basic parser (TODO: more keywords)
This commit is contained in:
parent
3d4fd06bba
commit
0b1ded01a6
6 changed files with 292 additions and 34 deletions
30
src/main.rs
30
src/main.rs
|
@ -1,13 +1,41 @@
|
|||
use std::fs;
|
||||
|
||||
use crate::tokenizer::Tokenizer;
|
||||
use crate::{parser::Parser, tokenizer::Tokenizer, value::Value, word::Word};
|
||||
|
||||
pub mod parser;
|
||||
pub mod tokenizer;
|
||||
pub mod ty;
|
||||
pub mod value;
|
||||
pub mod word;
|
||||
|
||||
fn main() {
|
||||
let tokens = Tokenizer::tokenize(fs::read_to_string("std.aea").unwrap()).unwrap();
|
||||
println!("{tokens:?}");
|
||||
let words = Parser::parse(&tokens).unwrap();
|
||||
println!("{words:?}");
|
||||
let mut stack = Vec::new();
|
||||
let mut call_stack = Vec::new();
|
||||
for word in words {
|
||||
match word {
|
||||
Word::Const(Value::Str(v)) => stack.push(v),
|
||||
Word::StartCall(n, o) => {
|
||||
println!("Starting call to {n} with objcall = {o}");
|
||||
call_stack.push((n, o))
|
||||
}
|
||||
Word::ConfirmCall(p) => {
|
||||
let dat = call_stack.pop().unwrap();
|
||||
println!(
|
||||
"Confirmed call to {} with return popping = {p} and objcall = {}",
|
||||
dat.0, dat.1
|
||||
);
|
||||
if dat.0 == "print" {
|
||||
println!("{}", stack.pop().unwrap());
|
||||
}
|
||||
}
|
||||
Word::Function(name, args, ret, words) => {
|
||||
println!("If i was an interpreter I would define function {name}({args:?}) {ret:?} {{{words:?}}}")
|
||||
}
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
197
src/parser.rs
197
src/parser.rs
|
@ -1,12 +1,207 @@
|
|||
use std::mem;
|
||||
|
||||
use crate::{
|
||||
value::Value,
|
||||
word::{Argdef, Token, TypePat, Word},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CodePos {
|
||||
pub file: Option<String>,
|
||||
pub char_in_file: Option<usize>,
|
||||
pub token: Option<usize>,
|
||||
pub line: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ParseError {
|
||||
InvalidToken(CodePos, char),
|
||||
InvalidToken(CodePos, String),
|
||||
StringInIdent(CodePos),
|
||||
UnexpectedEOF,
|
||||
UnexpectedOpenParen(CodePos),
|
||||
FunctionCallWithoutFn(CodePos),
|
||||
UnexpectedToken(CodePos, Token, String),
|
||||
ExpectedIdentifierAfterColon(CodePos),
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
slice: &'a [Token],
|
||||
index: usize,
|
||||
file: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(tokens: &'a [Token]) -> Parser<'a> {
|
||||
Parser {
|
||||
slice: tokens,
|
||||
index: 0,
|
||||
file: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(tokens: &'a [Token]) -> Result<Vec<Word>, ParseError> {
|
||||
let mut parser = Parser::new(tokens);
|
||||
parser.read_level_at(0)
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<Token, ParseError> {
|
||||
self.slice
|
||||
.get((self.index, self.index += 1).0)
|
||||
.cloned()
|
||||
.ok_or(ParseError::UnexpectedEOF)
|
||||
}
|
||||
fn next_as_ident(&mut self) -> Result<String, ParseError> {
|
||||
match self.next()? {
|
||||
Token::Ident(s) => Ok(s.to_owned()),
|
||||
other => Err(ParseError::UnexpectedToken(
|
||||
self.pos(),
|
||||
other,
|
||||
"Ident".to_owned(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
fn assert_next_eq(&mut self, token: Token) -> Result<Token, ParseError> {
|
||||
let next = self.next()?;
|
||||
if next == token {
|
||||
return Ok(next);
|
||||
}
|
||||
Err(ParseError::UnexpectedToken(
|
||||
self.pos(),
|
||||
next,
|
||||
format!("{token:?}"),
|
||||
))
|
||||
}
|
||||
fn next_matches(&mut self, f: impl FnOnce(&Token) -> bool) -> bool {
|
||||
if self.slice.get(self.index).is_some_and(f) {
|
||||
self.index += 1;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn pos(&self) -> CodePos {
|
||||
CodePos {
|
||||
file: self.file.as_ref().cloned(),
|
||||
char_in_file: None,
|
||||
token: Some(self.index),
|
||||
line: Some(
|
||||
self.slice[0..self.index]
|
||||
.iter()
|
||||
.filter(|t| matches!(t, Token::Newline(_)))
|
||||
.count(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_level_at(&mut self, index: usize) -> Result<Vec<Word>, ParseError> {
|
||||
self.index = index;
|
||||
let mut words = Vec::new();
|
||||
let mut file = None;
|
||||
|
||||
#[derive(Default)]
|
||||
struct State {
|
||||
is_objcall: bool,
|
||||
paren_mode: Vec<bool>,
|
||||
}
|
||||
let mut state = State {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
while let Ok(token) = self.next() {
|
||||
match token {
|
||||
Token::Space => (),
|
||||
Token::Exclam => words.push(Word::ConfirmCall(false)),
|
||||
Token::Semicolon => words.push(Word::ConfirmCall(true)),
|
||||
Token::Colon => {
|
||||
if words.last().is_some_and(|x| !matches!(x, Word::Const(_))) {
|
||||
words.push(Word::ConfirmCall(false));
|
||||
}
|
||||
state.is_objcall = true;
|
||||
}
|
||||
Token::Open => {
|
||||
state
|
||||
.paren_mode
|
||||
.pop()
|
||||
.ok_or(ParseError::UnexpectedOpenParen(self.pos()))?;
|
||||
state.paren_mode.push(true);
|
||||
}
|
||||
Token::Close => {
|
||||
words.push(Word::ConfirmCall(
|
||||
self.next_matches(|x| *x == Token::Semicolon),
|
||||
));
|
||||
}
|
||||
Token::Begin => {
|
||||
words.push(Word::Block(self.read_level_at(self.index + 1)?));
|
||||
}
|
||||
Token::End => break,
|
||||
Token::Return => words.push(Word::Return),
|
||||
Token::String(s) => words.push(Word::Const(Value::Str(s))),
|
||||
Token::Ident(ident) => {
|
||||
if let Some(keyword) = self.try_match_keyword(&ident)? {
|
||||
words.push(keyword);
|
||||
} else {
|
||||
words.push(Word::StartCall(ident, state.is_objcall));
|
||||
state.is_objcall = false;
|
||||
state.paren_mode.push(false);
|
||||
}
|
||||
}
|
||||
Token::Newline(_) => (),
|
||||
Token::Filename(f) => file = Some(f),
|
||||
}
|
||||
if self.file != file {
|
||||
self.file = file.clone();
|
||||
}
|
||||
}
|
||||
Ok(words)
|
||||
}
|
||||
|
||||
fn try_match_keyword(&mut self, ident: &String) -> Result<Option<Word>, ParseError> {
|
||||
match ident.as_str() {
|
||||
"fn" => {
|
||||
let name = self.next_as_ident()?;
|
||||
self.assert_next_eq(Token::Open)?;
|
||||
let mut arr = Vec::new();
|
||||
let mut args = Vec::new();
|
||||
loop {
|
||||
match self.next()? {
|
||||
Token::Ident(ident) => {
|
||||
arr.push(Argdef::Untyped(ident));
|
||||
}
|
||||
Token::Colon => {
|
||||
let new_argdef = Argdef::Typed(
|
||||
match arr.pop().ok_or(ParseError::UnexpectedToken(
|
||||
self.pos(),
|
||||
Token::Colon,
|
||||
"Ident".to_owned(),
|
||||
))? {
|
||||
Argdef::Untyped(a) => a,
|
||||
// TODO: make this error instead?
|
||||
Argdef::Typed(a, _) => a,
|
||||
},
|
||||
TypePat::new(self.next_as_ident()?),
|
||||
);
|
||||
arr.push(new_argdef);
|
||||
}
|
||||
Token::Close => args = mem::replace(&mut arr, Vec::new()),
|
||||
Token::Begin => break,
|
||||
Token::Newline(_) => (),
|
||||
Token::Filename(_) => (),
|
||||
token => Err(ParseError::UnexpectedToken(
|
||||
self.pos(),
|
||||
token,
|
||||
"Ident".to_owned(),
|
||||
))?,
|
||||
}
|
||||
}
|
||||
Ok(Some(Word::Function(
|
||||
name,
|
||||
args,
|
||||
arr,
|
||||
self.read_level_at(self.index)?,
|
||||
)))
|
||||
}
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ impl<'a> Tokenizer<'a> {
|
|||
CodePos {
|
||||
file: None,
|
||||
char_in_file: Some(self.string.len() - self.char_iter.as_str().len()),
|
||||
token: Some(tokens.len()),
|
||||
line: Some(
|
||||
tokens
|
||||
.iter()
|
||||
|
@ -51,7 +52,9 @@ impl<'a> Tokenizer<'a> {
|
|||
'r' => accum.push('\r'),
|
||||
'\\' => accum.push('\\'),
|
||||
'"' => accum.push('"'),
|
||||
char => return Err(ParseError::InvalidToken(self.tpos(&ret), char)),
|
||||
char => {
|
||||
return Err(ParseError::InvalidToken(self.tpos(&ret), char.to_string()))
|
||||
}
|
||||
}
|
||||
in_escape = false;
|
||||
continue;
|
||||
|
@ -71,7 +74,6 @@ impl<'a> Tokenizer<'a> {
|
|||
let symbol = match char {
|
||||
' ' => Some(Token::Space),
|
||||
'\t' => Some(Token::Space),
|
||||
'|' => Some(Token::Bar),
|
||||
'!' => Some(Token::Exclam),
|
||||
';' => Some(Token::Semicolon),
|
||||
':' => Some(Token::Colon),
|
||||
|
|
24
src/ty.rs
Normal file
24
src/ty.rs
Normal file
|
@ -0,0 +1,24 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::word::TypePat;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct AEATypeInfo {
|
||||
base: Arc<AEAType>,
|
||||
args: Vec<Arc<AEAType>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AEAType {
|
||||
name: String,
|
||||
uname: String,
|
||||
fields: Vec<(String, TypePat)>,
|
||||
}
|
||||
|
||||
impl PartialEq for AEAType {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.uname == other.uname
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for AEAType {}
|
37
src/word.rs
37
src/word.rs
|
@ -1,17 +1,35 @@
|
|||
use crate::value::Value;
|
||||
use crate::{ty::AEATypeInfo, value::Value};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Word {
|
||||
Const(Value),
|
||||
StartCall(String),
|
||||
StartObjCall(String),
|
||||
ConfirmCall,
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct TypePat {
|
||||
origin: String,
|
||||
ty: Option<AEATypeInfo>,
|
||||
}
|
||||
impl TypePat {
|
||||
pub fn new(origin: String) -> TypePat {
|
||||
TypePat { origin, ty: None }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Argdef {
|
||||
Untyped(String),
|
||||
Typed(String, TypePat),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Word {
|
||||
Const(Value),
|
||||
StartCall(String, bool),
|
||||
ConfirmCall(bool),
|
||||
Block(Vec<Word>),
|
||||
Function(String, Vec<Argdef>, Vec<Argdef>, Vec<Word>),
|
||||
Return,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Token {
|
||||
Space,
|
||||
Bar,
|
||||
Exclam,
|
||||
Semicolon,
|
||||
Colon,
|
||||
|
@ -24,4 +42,5 @@ pub enum Token {
|
|||
Ident(String),
|
||||
|
||||
Newline(usize),
|
||||
Filename(String),
|
||||
}
|
||||
|
|
32
std.aea
32
std.aea
|
@ -1,24 +1,14 @@
|
|||
!aea:exclam_mode!
|
||||
print "Hi 1" ;
|
||||
|
||||
fn main | args:Array:String ! ret:int {
|
||||
print "Hello world!" ;
|
||||
if args:length :== 0 !! {
|
||||
ret= 0 ^
|
||||
}
|
||||
print "I got " :+ String:of args:length ! ! :+ " arguments." ! ;
|
||||
ret= 0
|
||||
"Hi 1" print;
|
||||
|
||||
print("Hi 2");
|
||||
|
||||
"Hi 2" print();
|
||||
"hi":print;
|
||||
|
||||
fn test(a) ret {
|
||||
ret= print(a) !
|
||||
}
|
||||
|
||||
!aea:paren_mode!
|
||||
|
||||
fn main(args:Array:String) ret:int {
|
||||
print("Hello world!");
|
||||
if(args:length :== (0)) {
|
||||
ret=(0) ^
|
||||
}
|
||||
print("I got " :+(String:of(args:length)) :+(" arguments."));
|
||||
ret=(0)
|
||||
}
|
||||
|
||||
|
||||
|
||||
"hiiii" print;
|
||||
|
|
Loading…
Reference in a new issue