Hacky way to specify indent scopes per language via toml configs.

Can't do it via a scm query nicely because it returns an iterator over
all the matches, whereas we want to traverse the tree ourselves.

Can't extract the pattern data from a parsed query either.

Oh well, toml files for now.
This commit is contained in:
Blaž Hrastnik 2021-05-14 19:21:46 +09:00
parent 726072085d
commit 4a9d1163e0
7 changed files with 100 additions and 45 deletions

1
Cargo.lock generated
View file

@ -291,6 +291,7 @@ dependencies = [
"serde",
"smallvec",
"tendril",
"toml",
"tree-sitter",
"unicode-segmentation",
"unicode-width",

View file

@ -22,5 +22,6 @@ once_cell = "1.4"
regex = "1"
serde = { version = "1.0", features = ["derive"] }
toml = "0.5"
etcetera = "0.3"

View file

@ -1,6 +1,6 @@
use crate::{
find_first_non_whitespace_char,
syntax::Syntax,
syntax::{IndentQuery, LanguageConfiguration, Syntax},
tree_sitter::{Node, Tree},
Rope, RopeSlice,
};
@ -43,41 +43,12 @@ fn get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Nod
Some(node)
}
fn calculate_indentation(node: Option<Node>, newline: bool) -> usize {
fn calculate_indentation(query: &IndentQuery, node: Option<Node>, newline: bool) -> usize {
// NOTE: can't use contains() on query because of comparing Vec<String> and &str
// https://doc.rust-lang.org/std/vec/struct.Vec.html#method.contains
let mut increment: i32 = 0;
// Hardcoded for rust for now
let indent_scopes = &[
"while_expression",
"for_expression",
"loop_expression",
"if_expression",
"if_let_expression",
// "match_expression",
// "match_arm",
"tuple_expression",
"array_expression",
// indent_except_first_scopes
"use_list",
"block",
"match_block",
"arguments",
"parameters",
"declaration_list",
"field_declaration_list",
"field_initializer_list",
"struct_pattern",
"tuple_pattern",
"enum_variant_list",
// "function_item",
// "closure_expression",
"binary_expression",
"field_expression",
"where_clause",
];
let outdent = &["where", "}", "]", ")"];
let mut node = match node {
Some(node) => node,
None => return 0,
@ -88,7 +59,7 @@ fn calculate_indentation(node: Option<Node>, newline: bool) -> usize {
// if we're calculating indentation for a brand new line then the current node will become the
// parent node. We need to take it's indentation level into account too.
let node_kind = node.kind();
if newline && indent_scopes.contains(&node_kind) {
if newline && query.indent.contains(node_kind) {
increment += 1;
}
@ -102,14 +73,14 @@ fn calculate_indentation(node: Option<Node>, newline: bool) -> usize {
// }) <-- }) is two scopes
let starts_same_line = start == prev_start;
if outdent.contains(&node.kind()) && !starts_same_line {
if query.outdent.contains(node.kind()) && !starts_same_line {
// we outdent by skipping the rules for the current level and jumping up
// node = parent;
increment -= 1;
// continue;
}
if indent_scopes.contains(&parent_kind) // && not_first_or_last_sibling
if query.indent.contains(parent_kind) // && not_first_or_last_sibling
&& !starts_same_line
{
// println!("is_scope {}", parent_kind);
@ -128,6 +99,7 @@ fn calculate_indentation(node: Option<Node>, newline: bool) -> usize {
}
fn suggested_indent_for_line(
language_config: &LanguageConfiguration,
syntax: Option<&Syntax>,
text: RopeSlice,
line_num: usize,
@ -137,7 +109,7 @@ fn suggested_indent_for_line(
let current = indent_level_for_line(line, tab_width);
if let Some(start) = find_first_non_whitespace_char(text, line_num) {
return suggested_indent_for_pos(syntax, text, start, false);
return suggested_indent_for_pos(Some(language_config), syntax, text, start, false);
};
// if the line is blank, indent should be zero
@ -148,18 +120,24 @@ fn suggested_indent_for_line(
// - it should return 0 when mass indenting stuff
// - it should look up the wrapper node and count it too when we press o/O
pub fn suggested_indent_for_pos(
language_config: Option<&LanguageConfiguration>,
syntax: Option<&Syntax>,
text: RopeSlice,
pos: usize,
new_line: bool,
) -> usize {
if let Some(syntax) = syntax {
if let (Some(query), Some(syntax)) = (
language_config.and_then(|config| config.indent_query()),
syntax,
) {
let byte_start = text.char_to_byte(pos);
let node = get_highest_syntax_node_at_bytepos(syntax, byte_start);
// let config = load indentation query config from Syntax(should contain language_config)
// TODO: special case for comments
// TODO: if preserve_leading_whitespace
calculate_indentation(node, new_line)
calculate_indentation(query, node, new_line)
} else {
// TODO: heuristics for non-tree sitter grammars
0
@ -286,6 +264,7 @@ where
tab_width: 4,
unit: String::from(" "),
}),
indent_query: OnceCell::new(),
}],
});
@ -304,7 +283,7 @@ where
let line = text.line(i);
let indent = indent_level_for_line(line, tab_width);
assert_eq!(
suggested_indent_for_line(Some(&syntax), text, i, tab_width),
suggested_indent_for_line(&language_config, Some(&syntax), text, i, tab_width),
indent,
"line {}: {}",
i,

View file

@ -4,7 +4,7 @@ pub use helix_syntax::{get_language, get_language_name, Lang};
use std::{
borrow::Cow,
cell::RefCell,
collections::HashMap,
collections::{HashMap, HashSet},
path::{Path, PathBuf},
sync::Arc,
};
@ -41,6 +41,9 @@ pub struct LanguageConfiguration {
pub language_server: Option<LanguageServerConfiguration>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indent: Option<IndentationConfiguration>,
#[serde(skip)]
pub(crate) indent_query: OnceCell<Option<IndentQuery>>,
}
#[derive(Serialize, Deserialize)]
@ -59,6 +62,17 @@ pub struct IndentationConfiguration {
pub unit: String,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct IndentQuery {
#[serde(default)]
#[serde(skip_serializing_if = "HashSet::is_empty")]
pub indent: HashSet<String>,
#[serde(default)]
#[serde(skip_serializing_if = "HashSet::is_empty")]
pub outdent: HashSet<String>,
}
fn read_query(language: &str, filename: &str) -> String {
static INHERITS_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()]+)\s*").unwrap());
@ -127,6 +141,20 @@ impl LanguageConfiguration {
.clone()
}
pub fn indent_query(&self) -> Option<&IndentQuery> {
self.indent_query
.get_or_init(|| {
let language = get_language_name(self.language_id).to_ascii_lowercase();
let root = crate::runtime_dir();
let path = root.join("queries").join(language).join("indents.toml");
let toml = std::fs::read(&path).ok()?;
toml::from_slice(&toml).ok()
})
.as_ref()
}
pub fn scope(&self) -> &str {
&self.scope
}

View file

@ -1171,7 +1171,13 @@ fn open(cx: &mut Context, open: Open) {
let index = doc.text().line_to_char(line).saturating_sub(1);
// TODO: share logic with insert_newline for indentation
let indent_level = indent::suggested_indent_for_pos(doc.syntax(), text, index, true);
let indent_level = indent::suggested_indent_for_pos(
doc.language_config(),
doc.syntax(),
text,
index,
true,
);
let indent = doc.indent_unit().repeat(indent_level);
let mut text = String::with_capacity(1 + indent.len());
text.push('\n');
@ -1649,8 +1655,13 @@ pub mod insert {
let curr = contents.char(pos);
// TODO: offset range.head by 1? when calculating?
let indent_level =
indent::suggested_indent_for_pos(doc.syntax(), text, pos.saturating_sub(1), true);
let indent_level = indent::suggested_indent_for_pos(
doc.language_config(),
doc.syntax(),
text,
pos.saturating_sub(1),
true,
);
let indent = doc.indent_unit().repeat(indent_level);
let mut text = String::with_capacity(1 + indent.len());
text.push('\n');

View file

@ -328,6 +328,11 @@ impl Document {
.map(|language| language.scope.as_str())
}
#[inline]
pub fn language_config(&self) -> Option<&LanguageConfiguration> {
self.language.as_deref()
}
#[inline]
/// Current document version, incremented at each change.
pub fn version(&self) -> i32 {

View file

@ -0,0 +1,30 @@
indent = [
"while_expression",
"for_expression",
"loop_expression",
"if_expression",
"if_let_expression",
"tuple_expression",
"array_expression",
"use_list",
"block",
"match_block",
"arguments",
"parameters",
"declaration_list",
"field_declaration_list",
"field_initializer_list",
"struct_pattern",
"tuple_pattern",
"enum_variant_list",
"binary_expression",
"field_expression",
"where_clause"
]
outdent = [
"where",
"}",
"]",
")"
]