revamped snippet text element parsing

Snippet text elements can contain escape sequences that must be treated properly. Furthermore snippets must always escape certain characters (like `}` or `\`). The function has been updated to account for that. `text` is now also included with `anything` to match the grammar and can also match empty text. To avoid infinite loops the `non-empty` combinator has been added which is automatically used in the `one_or_more` and `zero_or more` combinator where the problemn would occur.
2023-03-13 19:27:54 +01:00 · 2023-03-13 19:27:54 +01:00 · 90348b889f
commit 90348b889f
parent bbf480007d
2 changed files with 85 additions and 45 deletions
--- a/helix-lsp/src/snippet.rs
+++ b/helix-lsp/src/snippet.rs
@ -12,7 +12,7 @@ pub enum CaseChange {
 #[derive(Debug, PartialEq, Eq)]
 pub enum FormatItem<'a> {
-    Text(&'a str),
+    Text(Tendril),
    Capture(usize),
    CaseChange(usize, CaseChange),
    Conditional(usize, Option<&'a str>, Option<&'a str>),
@ -20,9 +20,9 @@ pub enum FormatItem<'a> {
 #[derive(Debug, PartialEq, Eq)]
 pub struct Regex<'a> {
-    value: &'a str,
+    value: Tendril,
    replacement: Vec<FormatItem<'a>>,
-    options: Option<&'a str>,
+    options: Tendril,
 }
 #[derive(Debug, PartialEq, Eq)]
@ -36,14 +36,14 @@ pub enum SnippetElement<'a> {
    },
    Choice {
        tabstop: usize,
-        choices: Vec<&'a str>,
+        choices: Vec<Tendril>,
    },
    Variable {
        name: &'a str,
        default: Option<&'a str>,
        regex: Option<Regex<'a>>,
    },
-    Text(&'a str),
+    Text(Tendril),
 }
 #[derive(Debug, PartialEq, Eq)]
@ -67,12 +67,12 @@ fn render_elements(
    for element in snippet_elements {
        match element {
-            &Text(text) => {
+            Text(text) => {
                // small optimization to avoid calling replace when it's unnecessary
                let text = if text.contains('\n') {
                    Cow::Owned(text.replace('\n', newline_with_offset))
                } else {
-                    Cow::Borrowed(text)
+                    Cow::Borrowed(text.as_str())
                };
                *offset += text.chars().count();
                insert.push_str(&text);
@ -160,6 +160,7 @@ pub fn render(
 }
 mod parser {
    use helix_core::Tendril;
    use helix_parsec::*;
    use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
@ -210,8 +211,32 @@ mod parser {
        }
    }
-    fn text<'a, const SIZE: usize>(cs: [char; SIZE]) -> impl Parser<'a, Output = &'a str> {
+    const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$'];
-        take_while(move |c| cs.into_iter().all(|c1| c != c1))
+    const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/'];
    const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ','];
    fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> {
        move |input: &'a str| {
            let mut chars = input.char_indices();
            let mut res = Tendril::new();
            while let Some((i, c)) = chars.next() {
                match c {
                    '\\' => {
                        if let Some((_, c)) = chars.next() {
                            if escape_chars.contains(&c) {
                                res.push(c);
                                continue;
                            }
                        }
                        return Ok((&input[i..], res));
                    }
                    c if escape_chars.contains(&c) => return Ok((&input[i..], res)),
                    c => res.push(c),
                }
            }
            Ok(("", res))
        }
    }
    fn digit<'a>() -> impl Parser<'a, Output = usize> {
@ -274,20 +299,18 @@ mod parser {
    }
    fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> {
        let text = map(text(['$', '/']), FormatItem::Text);
        let replacement = reparse_as(
            take_until(|c| c == '/'),
            one_or_more(choice!(format(), text)),
        );
        map(
            seq!(
                "/",
-                take_until(|c| c == '/'),
+                // TODO parse as ECMAScript and convert to rust regex
                non_empty(text(&['/', '\\'])),
                "/",
-                replacement,
+                one_or_more(choice!(
                    format(),
                    map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text)
                )),
                "/",
-                optional(take_until(|c| c == '}')),
+                text(&['}', '\\',]),
            ),
            |(_, value, _, replacement, _, options)| Regex {
                value,
@ -308,13 +331,12 @@ mod parser {
    }
    fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
        let text = map(text(['$', '}']), SnippetElement::Text);
        map(
            seq!(
                "${",
                digit(),
                ":",
-                one_or_more(choice!(anything(), text)),
+                one_or_more(anything(TEXT_ESCAPE_CHARS)),
                "}"
            ),
            |seq| SnippetElement::Placeholder {
@ -330,7 +352,7 @@ mod parser {
                "${",
                digit(),
                "|",
-                sep(take_until(|c| c == ',' || c == '|'), ","),
+                sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","),
                "|}",
            ),
            |seq| SnippetElement::Choice {
@ -368,17 +390,21 @@ mod parser {
        )
    }
-    fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
+    fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> {
-        // The parser has to be constructed lazily to avoid infinite opaque type recursion
+        move |input: &'a str| {
-        |input: &'a str| {
+            let parser = choice!(
-            let parser = choice!(tabstop(), placeholder(), choice(), variable());
+                tabstop(),
                placeholder(),
                choice(),
                variable(),
                map(text(escape_chars), SnippetElement::Text)
            );
            parser.parse(input)
        }
    }
    fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
-        let text = map(text(['$']), SnippetElement::Text);
+        map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet {
        map(one_or_more(choice!(anything(), text)), |parts| Snippet {
            elements: parts,
        })
    }
@ -392,6 +418,7 @@ mod parser {
            }
        })
    }
    #[cfg(test)]
    mod test {
        use super::SnippetElement::*;
@ -407,12 +434,12 @@ mod parser {
            assert_eq!(
                Ok(Snippet {
                    elements: vec![
-                        Text("match("),
+                        Text("match(".into()),
                        Placeholder {
                            tabstop: 1,
-                            value: vec!(Text("Arg1")),
+                            value: vec!(Text("Arg1".into())),
                        },
-                        Text(")")
+                        Text(")".into())
                    ]
                }),
                parse("match(${1:Arg1})")
@ -446,15 +473,15 @@ mod parser {
            assert_eq!(
                Ok(Snippet {
                    elements: vec![
-                        Text("local "),
+                        Text("local ".into()),
                        Placeholder {
                            tabstop: 1,
-                            value: vec!(Text("var")),
+                            value: vec!(Text("var".into())),
                        },
-                        Text(" = "),
+                        Text(" = ".into()),
                        Placeholder {
                            tabstop: 1,
-                            value: vec!(Text("value")),
+                            value: vec!(Text("value".into())),
                        },
                    ]
                }),
@ -468,7 +495,7 @@ mod parser {
                Ok(Snippet {
                    elements: vec![Placeholder {
                        tabstop: 1,
-                        value: vec!(Text("var, "), Tabstop { tabstop: 2 },),
+                        value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },),
                    },]
                }),
                parse("${1:var, $2}")
@ -482,10 +509,10 @@ mod parser {
                    elements: vec![Placeholder {
                        tabstop: 1,
                        value: vec!(
-                            Text("foo "),
+                            Text("foo ".into()),
                            Placeholder {
                                tabstop: 2,
-                                value: vec!(Text("bar")),
+                                value: vec!(Text("bar".into())),
                            },
                        ),
                    },]
@ -499,27 +526,27 @@ mod parser {
            assert_eq!(
                Ok(Snippet {
                    elements: vec![
-                        Text("hello "),
+                        Text("hello ".into()),
                        Tabstop { tabstop: 1 },
                        Tabstop { tabstop: 2 },
-                        Text(" "),
+                        Text(" ".into()),
                        Choice {
                            tabstop: 1,
-                            choices: vec!["one", "two", "three"]
+                            choices: vec!["one".into(), "two".into(), "three".into()]
                        },
-                        Text(" "),
+                        Text(" ".into()),
                        Variable {
                            name: "name",
                            default: Some("foo"),
                            regex: None
                        },
-                        Text(" "),
+                        Text(" ".into()),
                        Variable {
                            name: "var",
                            default: None,
                            regex: None
                        },
-                        Text(" "),
+                        Text(" ".into()),
                        Variable {
                            name: "TM",
                            default: None,
@ -539,9 +566,9 @@ mod parser {
                        name: "TM_FILENAME",
                        default: None,
                        regex: Some(Regex {
-                            value: "(.*).+$",
+                            value: "(.*).+$".into(),
                            replacement: vec![FormatItem::Capture(1)],
-                            options: None,
+                            options: Tendril::new(),
                        }),
                    }]
                }),
--- a/helix-parsec/src/lib.rs
+++ b/helix-parsec/src/lib.rs
@ -459,6 +459,7 @@ pub fn zero_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
 where
    P: Parser<'a, Output = T>,
 {
    let parser = non_empty(parser);
    move |mut input| {
        let mut values = Vec::new();
@ -491,6 +492,7 @@ pub fn one_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
 where
    P: Parser<'a, Output = T>,
 {
    let parser = non_empty(parser);
    move |mut input| {
        let mut values = Vec::new();
@ -559,3 +561,14 @@ where
        Ok((input, values))
    }
 }
 pub fn non_empty<'a, T>(p: impl Parser<'a, Output = T>) -> impl Parser<'a, Output = T> {
    move |input| {
        let (new_input, res) = p.parse(input)?;
        if new_input.len() == input.len() {
            Err(input)
        } else {
            Ok((new_input, res))
        }
    }
 }