Implement relative indent queries,
i.e. also take into account the indentation of a previous line when computing the indentation for a new line.
This commit is contained in:
parent
23fd145a56
commit
d29a66f267
2 changed files with 213 additions and 85 deletions
|
@ -4,6 +4,7 @@ use tree_sitter::{Query, QueryCursor, QueryPredicateArg};
|
|||
|
||||
use crate::{
|
||||
chars::{char_is_line_ending, char_is_whitespace},
|
||||
find_first_non_whitespace_char,
|
||||
graphemes::{grapheme_width, tab_width_at},
|
||||
syntax::{LanguageConfiguration, RopeProvider, Syntax},
|
||||
tree_sitter::Node,
|
||||
|
@ -196,6 +197,19 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize, indent_width: us
|
|||
len / indent_width
|
||||
}
|
||||
|
||||
/// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width).
|
||||
fn whitespace_with_same_width(text: RopeSlice) -> String {
|
||||
let mut s = String::new();
|
||||
for grapheme in RopeGraphemes::new(text) {
|
||||
if grapheme == "\t" {
|
||||
s.push('\t');
|
||||
} else {
|
||||
s.extend(std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))));
|
||||
}
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Computes for node and all ancestors whether they are the first node on their line.
|
||||
/// The first entry in the return value represents the root node, the last one the node itself
|
||||
fn get_first_in_line(mut node: Node, new_line_byte_pos: Option<usize>) -> Vec<bool> {
|
||||
|
@ -241,21 +255,21 @@ fn get_first_in_line(mut node: Node, new_line_byte_pos: Option<usize>) -> Vec<bo
|
|||
/// - max(0, indent - outdent) tabs, if tabs are used for indentation
|
||||
/// - max(0, indent - outdent)*indent_width spaces, if spaces are used for indentation
|
||||
#[derive(Default, Debug, PartialEq, Eq, Clone)]
|
||||
pub struct Indentation {
|
||||
pub struct Indentation<'a> {
|
||||
indent: usize,
|
||||
indent_always: usize,
|
||||
outdent: usize,
|
||||
outdent_always: usize,
|
||||
/// The alignment, as a string containing only tabs & spaces. Storing this as a string instead of e.g.
|
||||
/// the (visual) width ensures that the alignment is preserved even if the tab width changes.
|
||||
align: Option<String>,
|
||||
align: Option<RopeSlice<'a>>,
|
||||
}
|
||||
|
||||
impl Indentation {
|
||||
impl<'a> Indentation<'a> {
|
||||
/// Add some other [Indentation] to this.
|
||||
/// The added indent should be the total added indent from one line.
|
||||
/// Indent should always be added starting from the bottom (or equivalently, the innermost tree-sitter node).
|
||||
fn add_line(&mut self, added: Indentation) {
|
||||
fn add_line(&mut self, added: Indentation<'a>) {
|
||||
// Align overrides the indent from outer scopes.
|
||||
if self.align.is_some() {
|
||||
return;
|
||||
|
@ -274,7 +288,7 @@ impl Indentation {
|
|||
/// Only captures that apply to the same line should be added together in this way (otherwise use `add_line`)
|
||||
/// and the captures should be added starting from the innermost tree-sitter node (currently this only matters
|
||||
/// if multiple `@align` patterns occur on the same line).
|
||||
fn add_capture(&mut self, added: IndentCaptureType) {
|
||||
fn add_capture(&mut self, added: IndentCaptureType<'a>) {
|
||||
match added {
|
||||
IndentCaptureType::Indent => {
|
||||
if self.indent_always == 0 {
|
||||
|
@ -303,7 +317,60 @@ impl Indentation {
|
|||
}
|
||||
}
|
||||
}
|
||||
fn into_string(self, indent_style: &IndentStyle) -> String {
|
||||
fn net_indent(&self) -> isize {
|
||||
(self.indent + self.indent_always) as isize
|
||||
- ((self.outdent + self.outdent_always) as isize)
|
||||
}
|
||||
/// Convert `self` into a string, taking into account the computed and actual indentation of some other line.
|
||||
fn relative_indent(
|
||||
&self,
|
||||
other_computed_indent: &Self,
|
||||
other_leading_whitespace: RopeSlice,
|
||||
indent_style: &IndentStyle,
|
||||
tab_width: usize,
|
||||
) -> Option<String> {
|
||||
if self.align == other_computed_indent.align {
|
||||
// If self and baseline are either not aligned to anything or both aligned the same way,
|
||||
// we can simply take `other_leading_whitespace` and add some indent / outdent to it (in the second
|
||||
// case, the alignment should already be accounted for in `other_leading_whitespace`).
|
||||
let indent_diff = self.net_indent() - other_computed_indent.net_indent();
|
||||
if indent_diff >= 0 {
|
||||
let mut indent = other_leading_whitespace.to_string();
|
||||
indent.push_str(&indent_style.as_str().repeat(indent_diff as usize));
|
||||
Some(indent)
|
||||
} else {
|
||||
// It's not entirely clear how to subtract a given indent level from the other line if its indentation is
|
||||
// complex (e.g. a weird alignment or a mixture of tabs and spaces). Therefore, we only consider the indent level
|
||||
// of `baseline_leading_whitespace`, add `indent_diff` to it and convert this indent level back to a string. If we ever encounter
|
||||
// cases where some other behavior is expected, the behavior for strings that don't exactly correspond to some indent
|
||||
// level could be re-evaluated.
|
||||
let actual_baseline_indent_level = indent_level_for_line(
|
||||
other_leading_whitespace,
|
||||
tab_width,
|
||||
indent_style.indent_width(tab_width),
|
||||
);
|
||||
let total_indent = actual_baseline_indent_level as isize + indent_diff;
|
||||
if total_indent < 0 {
|
||||
log::warn!(
|
||||
"Computed negative indent during a relative indent computation (actual baseline indent: {}, computed baseline indent: {}, computed line indent: {})",
|
||||
actual_baseline_indent_level,
|
||||
other_computed_indent.net_indent(),
|
||||
self.net_indent(),
|
||||
);
|
||||
Some(String::new())
|
||||
} else {
|
||||
Some(indent_style.as_str().repeat(total_indent as usize))
|
||||
}
|
||||
}
|
||||
} else if self.align.is_some() {
|
||||
Some(self.to_string(indent_style))
|
||||
} else {
|
||||
// If the other line has some alignment and `self` does not, there is no reasonable way to take
|
||||
// into account `other_leading_whitespace`.
|
||||
None
|
||||
}
|
||||
}
|
||||
pub fn to_string(&self, indent_style: &IndentStyle) -> String {
|
||||
let indent = self.indent_always + self.indent;
|
||||
let outdent = self.outdent_always + self.outdent;
|
||||
|
||||
|
@ -314,7 +381,7 @@ impl Indentation {
|
|||
0
|
||||
};
|
||||
let mut indent_string = if let Some(align) = self.align {
|
||||
align
|
||||
whitespace_with_same_width(align)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
@ -325,21 +392,21 @@ impl Indentation {
|
|||
|
||||
/// An indent definition which corresponds to a capture from the indent query
|
||||
#[derive(Debug)]
|
||||
struct IndentCapture {
|
||||
capture_type: IndentCaptureType,
|
||||
struct IndentCapture<'a> {
|
||||
capture_type: IndentCaptureType<'a>,
|
||||
scope: IndentScope,
|
||||
}
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
enum IndentCaptureType {
|
||||
enum IndentCaptureType<'a> {
|
||||
Indent,
|
||||
IndentAlways,
|
||||
Outdent,
|
||||
OutdentAlways,
|
||||
/// Alignment given as a string of whitespace
|
||||
Align(String),
|
||||
Align(RopeSlice<'a>),
|
||||
}
|
||||
|
||||
impl IndentCaptureType {
|
||||
impl<'a> IndentCaptureType<'a> {
|
||||
fn default_scope(&self) -> IndentScope {
|
||||
match self {
|
||||
IndentCaptureType::Indent | IndentCaptureType::IndentAlways => IndentScope::Tail,
|
||||
|
@ -371,8 +438,8 @@ enum ExtendCapture {
|
|||
/// each node (identified by its ID) the relevant captures (already filtered
|
||||
/// by predicates).
|
||||
#[derive(Debug)]
|
||||
struct IndentQueryResult {
|
||||
indent_captures: HashMap<usize, Vec<IndentCapture>>,
|
||||
struct IndentQueryResult<'a> {
|
||||
indent_captures: HashMap<usize, Vec<IndentCapture<'a>>>,
|
||||
extend_captures: HashMap<usize, Vec<ExtendCapture>>,
|
||||
}
|
||||
|
||||
|
@ -393,14 +460,14 @@ fn get_node_end_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
|
|||
node_line
|
||||
}
|
||||
|
||||
fn query_indents(
|
||||
fn query_indents<'a>(
|
||||
query: &Query,
|
||||
syntax: &Syntax,
|
||||
cursor: &mut QueryCursor,
|
||||
text: RopeSlice,
|
||||
text: RopeSlice<'a>,
|
||||
range: std::ops::Range<usize>,
|
||||
new_line_byte_pos: Option<usize>,
|
||||
) -> IndentQueryResult {
|
||||
) -> IndentQueryResult<'a> {
|
||||
let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new();
|
||||
let mut extend_captures: HashMap<usize, Vec<ExtendCapture>> = HashMap::new();
|
||||
cursor.set_byte_range(range);
|
||||
|
@ -488,7 +555,7 @@ fn query_indents(
|
|||
"outdent" => IndentCaptureType::Outdent,
|
||||
"outdent.always" => IndentCaptureType::OutdentAlways,
|
||||
// The alignment will be updated to the correct value at the end, when the anchor is known.
|
||||
"align" => IndentCaptureType::Align(String::from("")),
|
||||
"align" => IndentCaptureType::Align(RopeSlice::from("")),
|
||||
"anchor" => {
|
||||
if anchor.is_some() {
|
||||
log::error!("Invalid indent query: Encountered more than one @anchor in the same match.")
|
||||
|
@ -560,22 +627,10 @@ fn query_indents(
|
|||
}
|
||||
Some(anchor) => anchor,
|
||||
};
|
||||
// Create a string of tabs & spaces that should have the same width
|
||||
// as the string that precedes the anchor (independent of the tab width).
|
||||
let mut align = String::new();
|
||||
for grapheme in RopeGraphemes::new(
|
||||
capture.capture_type = IndentCaptureType::Align(
|
||||
text.line(anchor.start_position().row)
|
||||
.byte_slice(0..anchor.start_position().column),
|
||||
) {
|
||||
if grapheme == "\t" {
|
||||
align.push('\t');
|
||||
} else {
|
||||
align.extend(
|
||||
std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))),
|
||||
);
|
||||
}
|
||||
}
|
||||
capture.capture_type = IndentCaptureType::Align(align);
|
||||
);
|
||||
}
|
||||
indent_captures
|
||||
.entry(node_id)
|
||||
|
@ -661,56 +716,20 @@ fn extend_nodes<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
/// Use the syntax tree to determine the indentation for a given position.
|
||||
/// This can be used in 2 ways:
|
||||
///
|
||||
/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
|
||||
/// - In this case, pos should be inside the first tree-sitter node on that line.
|
||||
/// In most cases, this can just be the first non-whitespace on that line.
|
||||
/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
|
||||
/// after pos were moved to a new line.
|
||||
///
|
||||
/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
|
||||
/// Each of these nodes produces some [Indentation] for:
|
||||
///
|
||||
/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
|
||||
/// - The line after the node. This is defined by:
|
||||
/// - The scope `tail`.
|
||||
/// - The scope `all` if this node is not the first node on its line.
|
||||
/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
|
||||
/// The indents from different nodes for the same line are then combined.
|
||||
/// The result [Indentation] is simply the sum of the [Indentation] for all lines.
|
||||
///
|
||||
/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines:
|
||||
/// ```ignore
|
||||
/// some_function(|| {
|
||||
/// // Both the function parameters as well as the contained block should be indented.
|
||||
/// // Because they are on the same line, this only yields one indent level
|
||||
/// });
|
||||
/// ```
|
||||
///
|
||||
/// ```ignore
|
||||
/// some_function(
|
||||
/// param1,
|
||||
/// || {
|
||||
/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
|
||||
/// },
|
||||
/// );
|
||||
/// ```
|
||||
/// Prepare an indent query by computing:
|
||||
/// - The node from which to start the query (this is non-trivial due to `@extend` captures)
|
||||
/// - The indent captures for all relevant nodes.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn treesitter_indent_for_pos(
|
||||
fn init_indent_query<'a, 'b>(
|
||||
query: &Query,
|
||||
syntax: &Syntax,
|
||||
indent_style: &IndentStyle,
|
||||
syntax: &'a Syntax,
|
||||
text: RopeSlice<'b>,
|
||||
tab_width: usize,
|
||||
indent_width: usize,
|
||||
text: RopeSlice,
|
||||
line: usize,
|
||||
pos: usize,
|
||||
new_line: bool,
|
||||
) -> Option<String> {
|
||||
let byte_pos = text.char_to_byte(pos);
|
||||
let new_line_byte_pos = new_line.then_some(byte_pos);
|
||||
byte_pos: usize,
|
||||
new_line_byte_pos: Option<usize>,
|
||||
) -> Option<(Node<'a>, HashMap<usize, Vec<IndentCapture<'b>>>)> {
|
||||
// The innermost tree-sitter node which is considered for the indent
|
||||
// computation. It may change if some predeceding node is extended
|
||||
let mut node = syntax
|
||||
|
@ -754,7 +773,6 @@ pub fn treesitter_indent_for_pos(
|
|||
(query_result, deepest_preceding)
|
||||
})
|
||||
};
|
||||
let mut indent_captures = query_result.indent_captures;
|
||||
let extend_captures = query_result.extend_captures;
|
||||
|
||||
// Check for extend captures, potentially changing the node that the indent calculation starts with
|
||||
|
@ -769,6 +787,68 @@ pub fn treesitter_indent_for_pos(
|
|||
indent_width,
|
||||
);
|
||||
}
|
||||
Some((node, query_result.indent_captures))
|
||||
}
|
||||
|
||||
/// Use the syntax tree to determine the indentation for a given position.
|
||||
/// This can be used in 2 ways:
|
||||
///
|
||||
/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
|
||||
/// - In this case, pos should be inside the first tree-sitter node on that line.
|
||||
/// In most cases, this can just be the first non-whitespace on that line.
|
||||
/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
|
||||
/// after pos were moved to a new line.
|
||||
///
|
||||
/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
|
||||
/// Each of these nodes produces some [Indentation] for:
|
||||
///
|
||||
/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
|
||||
/// - The line after the node. This is defined by:
|
||||
/// - The scope `tail`.
|
||||
/// - The scope `all` if this node is not the first node on its line.
|
||||
/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
|
||||
/// The indents from different nodes for the same line are then combined.
|
||||
/// The result [Indentation] is simply the sum of the [Indentation] for all lines.
|
||||
///
|
||||
/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines:
|
||||
/// ```ignore
|
||||
/// some_function(|| {
|
||||
/// // Both the function parameters as well as the contained block should be indented.
|
||||
/// // Because they are on the same line, this only yields one indent level
|
||||
/// });
|
||||
/// ```
|
||||
///
|
||||
/// ```ignore
|
||||
/// some_function(
|
||||
/// param1,
|
||||
/// || {
|
||||
/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
|
||||
/// },
|
||||
/// );
|
||||
/// ```
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn treesitter_indent_for_pos<'a>(
|
||||
query: &Query,
|
||||
syntax: &Syntax,
|
||||
tab_width: usize,
|
||||
indent_width: usize,
|
||||
text: RopeSlice<'a>,
|
||||
line: usize,
|
||||
pos: usize,
|
||||
new_line: bool,
|
||||
) -> Option<Indentation<'a>> {
|
||||
let byte_pos = text.char_to_byte(pos);
|
||||
let new_line_byte_pos = new_line.then_some(byte_pos);
|
||||
let (mut node, mut indent_captures) = init_indent_query(
|
||||
query,
|
||||
syntax,
|
||||
text,
|
||||
tab_width,
|
||||
indent_width,
|
||||
line,
|
||||
byte_pos,
|
||||
new_line_byte_pos,
|
||||
)?;
|
||||
let mut first_in_line = get_first_in_line(node, new_line.then_some(byte_pos));
|
||||
|
||||
let mut result = Indentation::default();
|
||||
|
@ -836,7 +916,7 @@ pub fn treesitter_indent_for_pos(
|
|||
break;
|
||||
}
|
||||
}
|
||||
Some(result.into_string(indent_style))
|
||||
Some(result)
|
||||
}
|
||||
|
||||
/// Returns the indentation for a new line.
|
||||
|
@ -860,7 +940,6 @@ pub fn indent_for_newline(
|
|||
if let Some(indent) = treesitter_indent_for_pos(
|
||||
query,
|
||||
syntax,
|
||||
indent_style,
|
||||
tab_width,
|
||||
indent_width,
|
||||
text,
|
||||
|
@ -868,9 +947,55 @@ pub fn indent_for_newline(
|
|||
line_before_end_pos,
|
||||
true,
|
||||
) {
|
||||
return indent;
|
||||
// We want to compute the indentation not only based on the
|
||||
// syntax tree but also on the actual indentation of a previous
|
||||
// line. This makes indentation computation more resilient to
|
||||
// incomplete queries, incomplete source code & differing indentation
|
||||
// styles for the same language.
|
||||
// However, using the indent of a previous line as a baseline may not
|
||||
// make sense, e.g. if it has a different alignment than the new line.
|
||||
// In order to prevent edge cases with long running times, we only try
|
||||
// a constant number of (non-empty) lines.
|
||||
const MAX_ATTEMPTS: usize = 2;
|
||||
let mut num_attempts = 0;
|
||||
for line_idx in (0..=line_before).rev() {
|
||||
let line = text.line(line_idx);
|
||||
let first_non_whitespace_char = match find_first_non_whitespace_char(line) {
|
||||
Some(i) => i,
|
||||
None => {
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Some(indent) = (|| {
|
||||
let computed_indent = treesitter_indent_for_pos(
|
||||
query,
|
||||
syntax,
|
||||
tab_width,
|
||||
indent_width,
|
||||
text,
|
||||
line_idx,
|
||||
text.line_to_char(line_idx) + first_non_whitespace_char,
|
||||
false,
|
||||
)?;
|
||||
let leading_whitespace = line.slice(0..first_non_whitespace_char);
|
||||
indent.relative_indent(
|
||||
&computed_indent,
|
||||
leading_whitespace,
|
||||
indent_style,
|
||||
tab_width,
|
||||
)
|
||||
})() {
|
||||
return indent;
|
||||
}
|
||||
num_attempts += 1;
|
||||
if num_attempts == MAX_ATTEMPTS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return indent.to_string(indent_style);
|
||||
};
|
||||
}
|
||||
// Fallback in case we either don't have indent queries or they failed for some reason
|
||||
let indent_level = indent_level_for_line(text.line(current_line), tab_width, indent_width);
|
||||
indent_style.as_str().repeat(indent_level)
|
||||
}
|
||||
|
@ -962,10 +1087,13 @@ mod test {
|
|||
..Default::default()
|
||||
};
|
||||
|
||||
let add_capture = |mut indent: Indentation, capture| {
|
||||
fn add_capture<'a>(
|
||||
mut indent: Indentation<'a>,
|
||||
capture: IndentCaptureType<'a>,
|
||||
) -> Indentation<'a> {
|
||||
indent.add_capture(capture);
|
||||
indent
|
||||
};
|
||||
}
|
||||
|
||||
// adding an indent to no indent makes an indent
|
||||
assert_eq!(
|
||||
|
|
|
@ -210,7 +210,6 @@ fn test_treesitter_indent(
|
|||
let suggested_indent = treesitter_indent_for_pos(
|
||||
indent_query,
|
||||
&syntax,
|
||||
&indent_style,
|
||||
tab_width,
|
||||
indent_style.indent_width(tab_width),
|
||||
text,
|
||||
|
@ -218,7 +217,8 @@ fn test_treesitter_indent(
|
|||
text.line_to_char(i) + pos,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
.unwrap()
|
||||
.to_string(&indent_style);
|
||||
assert!(
|
||||
line.get_slice(..pos).map_or(false, |s| s == suggested_indent),
|
||||
"Wrong indentation for file {:?} on line {}:\n\"{}\" (original line)\n\"{}\" (suggested indentation)\n",
|
||||
|
|
Loading…
Reference in a new issue