revamped snippet text element parsing

Snippet text elements can contain escape sequences
that must be treated properly. Furthermore snippets
must always escape certain characters (like `}`
or `\`). The function has been updated to account
for that. `text` is now also included with
`anything` to match the grammar and can also
match empty text. To avoid infinite loops the
`non-empty` combinator has been added which is
automatically used in the `one_or_more` and
`zero_or more` combinator where the problemn would
occur.
This commit is contained in:
Pascal Kuthe 2023-03-13 19:27:54 +01:00 committed by Blaž Hrastnik
parent bbf480007d
commit 90348b889f
2 changed files with 85 additions and 45 deletions

View file

@ -12,7 +12,7 @@ pub enum CaseChange {
#[derive(Debug, PartialEq, Eq)]
pub enum FormatItem<'a> {
Text(&'a str),
Text(Tendril),
Capture(usize),
CaseChange(usize, CaseChange),
Conditional(usize, Option<&'a str>, Option<&'a str>),
@ -20,9 +20,9 @@ pub enum FormatItem<'a> {
#[derive(Debug, PartialEq, Eq)]
pub struct Regex<'a> {
value: &'a str,
value: Tendril,
replacement: Vec<FormatItem<'a>>,
options: Option<&'a str>,
options: Tendril,
}
#[derive(Debug, PartialEq, Eq)]
@ -36,14 +36,14 @@ pub enum SnippetElement<'a> {
},
Choice {
tabstop: usize,
choices: Vec<&'a str>,
choices: Vec<Tendril>,
},
Variable {
name: &'a str,
default: Option<&'a str>,
regex: Option<Regex<'a>>,
},
Text(&'a str),
Text(Tendril),
}
#[derive(Debug, PartialEq, Eq)]
@ -67,12 +67,12 @@ fn render_elements(
for element in snippet_elements {
match element {
&Text(text) => {
Text(text) => {
// small optimization to avoid calling replace when it's unnecessary
let text = if text.contains('\n') {
Cow::Owned(text.replace('\n', newline_with_offset))
} else {
Cow::Borrowed(text)
Cow::Borrowed(text.as_str())
};
*offset += text.chars().count();
insert.push_str(&text);
@ -160,6 +160,7 @@ pub fn render(
}
mod parser {
use helix_core::Tendril;
use helix_parsec::*;
use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
@ -210,8 +211,32 @@ mod parser {
}
}
fn text<'a, const SIZE: usize>(cs: [char; SIZE]) -> impl Parser<'a, Output = &'a str> {
take_while(move |c| cs.into_iter().all(|c1| c != c1))
const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$'];
const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/'];
const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ','];
fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> {
move |input: &'a str| {
let mut chars = input.char_indices();
let mut res = Tendril::new();
while let Some((i, c)) = chars.next() {
match c {
'\\' => {
if let Some((_, c)) = chars.next() {
if escape_chars.contains(&c) {
res.push(c);
continue;
}
}
return Ok((&input[i..], res));
}
c if escape_chars.contains(&c) => return Ok((&input[i..], res)),
c => res.push(c),
}
}
Ok(("", res))
}
}
fn digit<'a>() -> impl Parser<'a, Output = usize> {
@ -274,20 +299,18 @@ mod parser {
}
fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> {
let text = map(text(['$', '/']), FormatItem::Text);
let replacement = reparse_as(
take_until(|c| c == '/'),
one_or_more(choice!(format(), text)),
);
map(
seq!(
"/",
take_until(|c| c == '/'),
// TODO parse as ECMAScript and convert to rust regex
non_empty(text(&['/', '\\'])),
"/",
replacement,
one_or_more(choice!(
format(),
map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text)
)),
"/",
optional(take_until(|c| c == '}')),
text(&['}', '\\',]),
),
|(_, value, _, replacement, _, options)| Regex {
value,
@ -308,13 +331,12 @@ mod parser {
}
fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
let text = map(text(['$', '}']), SnippetElement::Text);
map(
seq!(
"${",
digit(),
":",
one_or_more(choice!(anything(), text)),
one_or_more(anything(TEXT_ESCAPE_CHARS)),
"}"
),
|seq| SnippetElement::Placeholder {
@ -330,7 +352,7 @@ mod parser {
"${",
digit(),
"|",
sep(take_until(|c| c == ',' || c == '|'), ","),
sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","),
"|}",
),
|seq| SnippetElement::Choice {
@ -368,17 +390,21 @@ mod parser {
)
}
fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
// The parser has to be constructed lazily to avoid infinite opaque type recursion
|input: &'a str| {
let parser = choice!(tabstop(), placeholder(), choice(), variable());
fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> {
move |input: &'a str| {
let parser = choice!(
tabstop(),
placeholder(),
choice(),
variable(),
map(text(escape_chars), SnippetElement::Text)
);
parser.parse(input)
}
}
fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
let text = map(text(['$']), SnippetElement::Text);
map(one_or_more(choice!(anything(), text)), |parts| Snippet {
map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet {
elements: parts,
})
}
@ -392,6 +418,7 @@ mod parser {
}
})
}
#[cfg(test)]
mod test {
use super::SnippetElement::*;
@ -407,12 +434,12 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("match("),
Text("match(".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("Arg1")),
value: vec!(Text("Arg1".into())),
},
Text(")")
Text(")".into())
]
}),
parse("match(${1:Arg1})")
@ -446,15 +473,15 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("local "),
Text("local ".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("var")),
value: vec!(Text("var".into())),
},
Text(" = "),
Text(" = ".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("value")),
value: vec!(Text("value".into())),
},
]
}),
@ -468,7 +495,7 @@ mod parser {
Ok(Snippet {
elements: vec![Placeholder {
tabstop: 1,
value: vec!(Text("var, "), Tabstop { tabstop: 2 },),
value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },),
},]
}),
parse("${1:var, $2}")
@ -482,10 +509,10 @@ mod parser {
elements: vec![Placeholder {
tabstop: 1,
value: vec!(
Text("foo "),
Text("foo ".into()),
Placeholder {
tabstop: 2,
value: vec!(Text("bar")),
value: vec!(Text("bar".into())),
},
),
},]
@ -499,27 +526,27 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("hello "),
Text("hello ".into()),
Tabstop { tabstop: 1 },
Tabstop { tabstop: 2 },
Text(" "),
Text(" ".into()),
Choice {
tabstop: 1,
choices: vec!["one", "two", "three"]
choices: vec!["one".into(), "two".into(), "three".into()]
},
Text(" "),
Text(" ".into()),
Variable {
name: "name",
default: Some("foo"),
regex: None
},
Text(" "),
Text(" ".into()),
Variable {
name: "var",
default: None,
regex: None
},
Text(" "),
Text(" ".into()),
Variable {
name: "TM",
default: None,
@ -539,9 +566,9 @@ mod parser {
name: "TM_FILENAME",
default: None,
regex: Some(Regex {
value: "(.*).+$",
value: "(.*).+$".into(),
replacement: vec![FormatItem::Capture(1)],
options: None,
options: Tendril::new(),
}),
}]
}),

View file

@ -459,6 +459,7 @@ pub fn zero_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
where
P: Parser<'a, Output = T>,
{
let parser = non_empty(parser);
move |mut input| {
let mut values = Vec::new();
@ -491,6 +492,7 @@ pub fn one_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
where
P: Parser<'a, Output = T>,
{
let parser = non_empty(parser);
move |mut input| {
let mut values = Vec::new();
@ -559,3 +561,14 @@ where
Ok((input, values))
}
}
pub fn non_empty<'a, T>(p: impl Parser<'a, Output = T>) -> impl Parser<'a, Output = T> {
move |input| {
let (new_input, res) = p.parse(input)?;
if new_input.len() == input.len() {
Err(input)
} else {
Ok((new_input, res))
}
}
}