revamped snippet text element parsing

Snippet text elements can contain escape sequences
that must be treated properly. Furthermore snippets
must always escape certain characters (like `}`
or `\`). The function has been updated to account
for that. `text` is now also included with
`anything` to match the grammar and can also
match empty text. To avoid infinite loops the
`non-empty` combinator has been added which is
automatically used in the `one_or_more` and
`zero_or more` combinator where the problemn would
occur.
This commit is contained in:
Pascal Kuthe 2023-03-13 19:27:54 +01:00 committed by Blaž Hrastnik
parent bbf480007d
commit 90348b889f
2 changed files with 85 additions and 45 deletions

View file

@ -12,7 +12,7 @@ pub enum CaseChange {
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum FormatItem<'a> { pub enum FormatItem<'a> {
Text(&'a str), Text(Tendril),
Capture(usize), Capture(usize),
CaseChange(usize, CaseChange), CaseChange(usize, CaseChange),
Conditional(usize, Option<&'a str>, Option<&'a str>), Conditional(usize, Option<&'a str>, Option<&'a str>),
@ -20,9 +20,9 @@ pub enum FormatItem<'a> {
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub struct Regex<'a> { pub struct Regex<'a> {
value: &'a str, value: Tendril,
replacement: Vec<FormatItem<'a>>, replacement: Vec<FormatItem<'a>>,
options: Option<&'a str>, options: Tendril,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -36,14 +36,14 @@ pub enum SnippetElement<'a> {
}, },
Choice { Choice {
tabstop: usize, tabstop: usize,
choices: Vec<&'a str>, choices: Vec<Tendril>,
}, },
Variable { Variable {
name: &'a str, name: &'a str,
default: Option<&'a str>, default: Option<&'a str>,
regex: Option<Regex<'a>>, regex: Option<Regex<'a>>,
}, },
Text(&'a str), Text(Tendril),
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -67,12 +67,12 @@ fn render_elements(
for element in snippet_elements { for element in snippet_elements {
match element { match element {
&Text(text) => { Text(text) => {
// small optimization to avoid calling replace when it's unnecessary // small optimization to avoid calling replace when it's unnecessary
let text = if text.contains('\n') { let text = if text.contains('\n') {
Cow::Owned(text.replace('\n', newline_with_offset)) Cow::Owned(text.replace('\n', newline_with_offset))
} else { } else {
Cow::Borrowed(text) Cow::Borrowed(text.as_str())
}; };
*offset += text.chars().count(); *offset += text.chars().count();
insert.push_str(&text); insert.push_str(&text);
@ -160,6 +160,7 @@ pub fn render(
} }
mod parser { mod parser {
use helix_core::Tendril;
use helix_parsec::*; use helix_parsec::*;
use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement}; use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
@ -210,8 +211,32 @@ mod parser {
} }
} }
fn text<'a, const SIZE: usize>(cs: [char; SIZE]) -> impl Parser<'a, Output = &'a str> { const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$'];
take_while(move |c| cs.into_iter().all(|c1| c != c1)) const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/'];
const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ','];
fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> {
move |input: &'a str| {
let mut chars = input.char_indices();
let mut res = Tendril::new();
while let Some((i, c)) = chars.next() {
match c {
'\\' => {
if let Some((_, c)) = chars.next() {
if escape_chars.contains(&c) {
res.push(c);
continue;
}
}
return Ok((&input[i..], res));
}
c if escape_chars.contains(&c) => return Ok((&input[i..], res)),
c => res.push(c),
}
}
Ok(("", res))
}
} }
fn digit<'a>() -> impl Parser<'a, Output = usize> { fn digit<'a>() -> impl Parser<'a, Output = usize> {
@ -274,20 +299,18 @@ mod parser {
} }
fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> { fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> {
let text = map(text(['$', '/']), FormatItem::Text);
let replacement = reparse_as(
take_until(|c| c == '/'),
one_or_more(choice!(format(), text)),
);
map( map(
seq!( seq!(
"/", "/",
take_until(|c| c == '/'), // TODO parse as ECMAScript and convert to rust regex
non_empty(text(&['/', '\\'])),
"/", "/",
replacement, one_or_more(choice!(
format(),
map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text)
)),
"/", "/",
optional(take_until(|c| c == '}')), text(&['}', '\\',]),
), ),
|(_, value, _, replacement, _, options)| Regex { |(_, value, _, replacement, _, options)| Regex {
value, value,
@ -308,13 +331,12 @@ mod parser {
} }
fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> { fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
let text = map(text(['$', '}']), SnippetElement::Text);
map( map(
seq!( seq!(
"${", "${",
digit(), digit(),
":", ":",
one_or_more(choice!(anything(), text)), one_or_more(anything(TEXT_ESCAPE_CHARS)),
"}" "}"
), ),
|seq| SnippetElement::Placeholder { |seq| SnippetElement::Placeholder {
@ -330,7 +352,7 @@ mod parser {
"${", "${",
digit(), digit(),
"|", "|",
sep(take_until(|c| c == ',' || c == '|'), ","), sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","),
"|}", "|}",
), ),
|seq| SnippetElement::Choice { |seq| SnippetElement::Choice {
@ -368,17 +390,21 @@ mod parser {
) )
} }
fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> { fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> {
// The parser has to be constructed lazily to avoid infinite opaque type recursion move |input: &'a str| {
|input: &'a str| { let parser = choice!(
let parser = choice!(tabstop(), placeholder(), choice(), variable()); tabstop(),
placeholder(),
choice(),
variable(),
map(text(escape_chars), SnippetElement::Text)
);
parser.parse(input) parser.parse(input)
} }
} }
fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> { fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
let text = map(text(['$']), SnippetElement::Text); map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet {
map(one_or_more(choice!(anything(), text)), |parts| Snippet {
elements: parts, elements: parts,
}) })
} }
@ -392,6 +418,7 @@ mod parser {
} }
}) })
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::SnippetElement::*; use super::SnippetElement::*;
@ -407,12 +434,12 @@ mod parser {
assert_eq!( assert_eq!(
Ok(Snippet { Ok(Snippet {
elements: vec![ elements: vec![
Text("match("), Text("match(".into()),
Placeholder { Placeholder {
tabstop: 1, tabstop: 1,
value: vec!(Text("Arg1")), value: vec!(Text("Arg1".into())),
}, },
Text(")") Text(")".into())
] ]
}), }),
parse("match(${1:Arg1})") parse("match(${1:Arg1})")
@ -446,15 +473,15 @@ mod parser {
assert_eq!( assert_eq!(
Ok(Snippet { Ok(Snippet {
elements: vec![ elements: vec![
Text("local "), Text("local ".into()),
Placeholder { Placeholder {
tabstop: 1, tabstop: 1,
value: vec!(Text("var")), value: vec!(Text("var".into())),
}, },
Text(" = "), Text(" = ".into()),
Placeholder { Placeholder {
tabstop: 1, tabstop: 1,
value: vec!(Text("value")), value: vec!(Text("value".into())),
}, },
] ]
}), }),
@ -468,7 +495,7 @@ mod parser {
Ok(Snippet { Ok(Snippet {
elements: vec![Placeholder { elements: vec![Placeholder {
tabstop: 1, tabstop: 1,
value: vec!(Text("var, "), Tabstop { tabstop: 2 },), value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },),
},] },]
}), }),
parse("${1:var, $2}") parse("${1:var, $2}")
@ -482,10 +509,10 @@ mod parser {
elements: vec![Placeholder { elements: vec![Placeholder {
tabstop: 1, tabstop: 1,
value: vec!( value: vec!(
Text("foo "), Text("foo ".into()),
Placeholder { Placeholder {
tabstop: 2, tabstop: 2,
value: vec!(Text("bar")), value: vec!(Text("bar".into())),
}, },
), ),
},] },]
@ -499,27 +526,27 @@ mod parser {
assert_eq!( assert_eq!(
Ok(Snippet { Ok(Snippet {
elements: vec![ elements: vec![
Text("hello "), Text("hello ".into()),
Tabstop { tabstop: 1 }, Tabstop { tabstop: 1 },
Tabstop { tabstop: 2 }, Tabstop { tabstop: 2 },
Text(" "), Text(" ".into()),
Choice { Choice {
tabstop: 1, tabstop: 1,
choices: vec!["one", "two", "three"] choices: vec!["one".into(), "two".into(), "three".into()]
}, },
Text(" "), Text(" ".into()),
Variable { Variable {
name: "name", name: "name",
default: Some("foo"), default: Some("foo"),
regex: None regex: None
}, },
Text(" "), Text(" ".into()),
Variable { Variable {
name: "var", name: "var",
default: None, default: None,
regex: None regex: None
}, },
Text(" "), Text(" ".into()),
Variable { Variable {
name: "TM", name: "TM",
default: None, default: None,
@ -539,9 +566,9 @@ mod parser {
name: "TM_FILENAME", name: "TM_FILENAME",
default: None, default: None,
regex: Some(Regex { regex: Some(Regex {
value: "(.*).+$", value: "(.*).+$".into(),
replacement: vec![FormatItem::Capture(1)], replacement: vec![FormatItem::Capture(1)],
options: None, options: Tendril::new(),
}), }),
}] }]
}), }),

View file

@ -459,6 +459,7 @@ pub fn zero_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
where where
P: Parser<'a, Output = T>, P: Parser<'a, Output = T>,
{ {
let parser = non_empty(parser);
move |mut input| { move |mut input| {
let mut values = Vec::new(); let mut values = Vec::new();
@ -491,6 +492,7 @@ pub fn one_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
where where
P: Parser<'a, Output = T>, P: Parser<'a, Output = T>,
{ {
let parser = non_empty(parser);
move |mut input| { move |mut input| {
let mut values = Vec::new(); let mut values = Vec::new();
@ -559,3 +561,14 @@ where
Ok((input, values)) Ok((input, values))
} }
} }
pub fn non_empty<'a, T>(p: impl Parser<'a, Output = T>) -> impl Parser<'a, Output = T> {
move |input| {
let (new_input, res) = p.parse(input)?;
if new_input.len() == input.len() {
Err(input)
} else {
Ok((new_input, res))
}
}
}