Implement relative indent queries,

i.e. also take into account the indentation of a previous
line when computing the indentation for a new line.
This commit is contained in:
Daniel Ebert 2023-09-16 18:22:46 +02:00 committed by Blaž Hrastnik
parent 23fd145a56
commit d29a66f267
2 changed files with 213 additions and 85 deletions

View file

@ -4,6 +4,7 @@ use tree_sitter::{Query, QueryCursor, QueryPredicateArg};
use crate::{
chars::{char_is_line_ending, char_is_whitespace},
find_first_non_whitespace_char,
graphemes::{grapheme_width, tab_width_at},
syntax::{LanguageConfiguration, RopeProvider, Syntax},
tree_sitter::Node,
@ -196,6 +197,19 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize, indent_width: us
len / indent_width
}
/// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width).
fn whitespace_with_same_width(text: RopeSlice) -> String {
let mut s = String::new();
for grapheme in RopeGraphemes::new(text) {
if grapheme == "\t" {
s.push('\t');
} else {
s.extend(std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))));
}
}
s
}
/// Computes for node and all ancestors whether they are the first node on their line.
/// The first entry in the return value represents the root node, the last one the node itself
fn get_first_in_line(mut node: Node, new_line_byte_pos: Option<usize>) -> Vec<bool> {
@ -241,21 +255,21 @@ fn get_first_in_line(mut node: Node, new_line_byte_pos: Option<usize>) -> Vec<bo
/// - max(0, indent - outdent) tabs, if tabs are used for indentation
/// - max(0, indent - outdent)*indent_width spaces, if spaces are used for indentation
#[derive(Default, Debug, PartialEq, Eq, Clone)]
pub struct Indentation {
pub struct Indentation<'a> {
indent: usize,
indent_always: usize,
outdent: usize,
outdent_always: usize,
/// The alignment, as a string containing only tabs & spaces. Storing this as a string instead of e.g.
/// the (visual) width ensures that the alignment is preserved even if the tab width changes.
align: Option<String>,
align: Option<RopeSlice<'a>>,
}
impl Indentation {
impl<'a> Indentation<'a> {
/// Add some other [Indentation] to this.
/// The added indent should be the total added indent from one line.
/// Indent should always be added starting from the bottom (or equivalently, the innermost tree-sitter node).
fn add_line(&mut self, added: Indentation) {
fn add_line(&mut self, added: Indentation<'a>) {
// Align overrides the indent from outer scopes.
if self.align.is_some() {
return;
@ -274,7 +288,7 @@ impl Indentation {
/// Only captures that apply to the same line should be added together in this way (otherwise use `add_line`)
/// and the captures should be added starting from the innermost tree-sitter node (currently this only matters
/// if multiple `@align` patterns occur on the same line).
fn add_capture(&mut self, added: IndentCaptureType) {
fn add_capture(&mut self, added: IndentCaptureType<'a>) {
match added {
IndentCaptureType::Indent => {
if self.indent_always == 0 {
@ -303,7 +317,60 @@ impl Indentation {
}
}
}
fn into_string(self, indent_style: &IndentStyle) -> String {
fn net_indent(&self) -> isize {
(self.indent + self.indent_always) as isize
- ((self.outdent + self.outdent_always) as isize)
}
/// Convert `self` into a string, taking into account the computed and actual indentation of some other line.
fn relative_indent(
&self,
other_computed_indent: &Self,
other_leading_whitespace: RopeSlice,
indent_style: &IndentStyle,
tab_width: usize,
) -> Option<String> {
if self.align == other_computed_indent.align {
// If self and baseline are either not aligned to anything or both aligned the same way,
// we can simply take `other_leading_whitespace` and add some indent / outdent to it (in the second
// case, the alignment should already be accounted for in `other_leading_whitespace`).
let indent_diff = self.net_indent() - other_computed_indent.net_indent();
if indent_diff >= 0 {
let mut indent = other_leading_whitespace.to_string();
indent.push_str(&indent_style.as_str().repeat(indent_diff as usize));
Some(indent)
} else {
// It's not entirely clear how to subtract a given indent level from the other line if its indentation is
// complex (e.g. a weird alignment or a mixture of tabs and spaces). Therefore, we only consider the indent level
// of `baseline_leading_whitespace`, add `indent_diff` to it and convert this indent level back to a string. If we ever encounter
// cases where some other behavior is expected, the behavior for strings that don't exactly correspond to some indent
// level could be re-evaluated.
let actual_baseline_indent_level = indent_level_for_line(
other_leading_whitespace,
tab_width,
indent_style.indent_width(tab_width),
);
let total_indent = actual_baseline_indent_level as isize + indent_diff;
if total_indent < 0 {
log::warn!(
"Computed negative indent during a relative indent computation (actual baseline indent: {}, computed baseline indent: {}, computed line indent: {})",
actual_baseline_indent_level,
other_computed_indent.net_indent(),
self.net_indent(),
);
Some(String::new())
} else {
Some(indent_style.as_str().repeat(total_indent as usize))
}
}
} else if self.align.is_some() {
Some(self.to_string(indent_style))
} else {
// If the other line has some alignment and `self` does not, there is no reasonable way to take
// into account `other_leading_whitespace`.
None
}
}
pub fn to_string(&self, indent_style: &IndentStyle) -> String {
let indent = self.indent_always + self.indent;
let outdent = self.outdent_always + self.outdent;
@ -314,7 +381,7 @@ impl Indentation {
0
};
let mut indent_string = if let Some(align) = self.align {
align
whitespace_with_same_width(align)
} else {
String::new()
};
@ -325,21 +392,21 @@ impl Indentation {
/// An indent definition which corresponds to a capture from the indent query
#[derive(Debug)]
struct IndentCapture {
capture_type: IndentCaptureType,
struct IndentCapture<'a> {
capture_type: IndentCaptureType<'a>,
scope: IndentScope,
}
#[derive(Debug, Clone, PartialEq)]
enum IndentCaptureType {
enum IndentCaptureType<'a> {
Indent,
IndentAlways,
Outdent,
OutdentAlways,
/// Alignment given as a string of whitespace
Align(String),
Align(RopeSlice<'a>),
}
impl IndentCaptureType {
impl<'a> IndentCaptureType<'a> {
fn default_scope(&self) -> IndentScope {
match self {
IndentCaptureType::Indent | IndentCaptureType::IndentAlways => IndentScope::Tail,
@ -371,8 +438,8 @@ enum ExtendCapture {
/// each node (identified by its ID) the relevant captures (already filtered
/// by predicates).
#[derive(Debug)]
struct IndentQueryResult {
indent_captures: HashMap<usize, Vec<IndentCapture>>,
struct IndentQueryResult<'a> {
indent_captures: HashMap<usize, Vec<IndentCapture<'a>>>,
extend_captures: HashMap<usize, Vec<ExtendCapture>>,
}
@ -393,14 +460,14 @@ fn get_node_end_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
node_line
}
fn query_indents(
fn query_indents<'a>(
query: &Query,
syntax: &Syntax,
cursor: &mut QueryCursor,
text: RopeSlice,
text: RopeSlice<'a>,
range: std::ops::Range<usize>,
new_line_byte_pos: Option<usize>,
) -> IndentQueryResult {
) -> IndentQueryResult<'a> {
let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new();
let mut extend_captures: HashMap<usize, Vec<ExtendCapture>> = HashMap::new();
cursor.set_byte_range(range);
@ -488,7 +555,7 @@ fn query_indents(
"outdent" => IndentCaptureType::Outdent,
"outdent.always" => IndentCaptureType::OutdentAlways,
// The alignment will be updated to the correct value at the end, when the anchor is known.
"align" => IndentCaptureType::Align(String::from("")),
"align" => IndentCaptureType::Align(RopeSlice::from("")),
"anchor" => {
if anchor.is_some() {
log::error!("Invalid indent query: Encountered more than one @anchor in the same match.")
@ -560,22 +627,10 @@ fn query_indents(
}
Some(anchor) => anchor,
};
// Create a string of tabs & spaces that should have the same width
// as the string that precedes the anchor (independent of the tab width).
let mut align = String::new();
for grapheme in RopeGraphemes::new(
capture.capture_type = IndentCaptureType::Align(
text.line(anchor.start_position().row)
.byte_slice(0..anchor.start_position().column),
) {
if grapheme == "\t" {
align.push('\t');
} else {
align.extend(
std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))),
);
}
}
capture.capture_type = IndentCaptureType::Align(align);
);
}
indent_captures
.entry(node_id)
@ -661,56 +716,20 @@ fn extend_nodes<'a>(
}
}
/// Use the syntax tree to determine the indentation for a given position.
/// This can be used in 2 ways:
///
/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
/// - In this case, pos should be inside the first tree-sitter node on that line.
/// In most cases, this can just be the first non-whitespace on that line.
/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
/// after pos were moved to a new line.
///
/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
/// Each of these nodes produces some [Indentation] for:
///
/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
/// - The line after the node. This is defined by:
/// - The scope `tail`.
/// - The scope `all` if this node is not the first node on its line.
/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
/// The indents from different nodes for the same line are then combined.
/// The result [Indentation] is simply the sum of the [Indentation] for all lines.
///
/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines:
/// ```ignore
/// some_function(|| {
/// // Both the function parameters as well as the contained block should be indented.
/// // Because they are on the same line, this only yields one indent level
/// });
/// ```
///
/// ```ignore
/// some_function(
/// param1,
/// || {
/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
/// },
/// );
/// ```
/// Prepare an indent query by computing:
/// - The node from which to start the query (this is non-trivial due to `@extend` captures)
/// - The indent captures for all relevant nodes.
#[allow(clippy::too_many_arguments)]
pub fn treesitter_indent_for_pos(
fn init_indent_query<'a, 'b>(
query: &Query,
syntax: &Syntax,
indent_style: &IndentStyle,
syntax: &'a Syntax,
text: RopeSlice<'b>,
tab_width: usize,
indent_width: usize,
text: RopeSlice,
line: usize,
pos: usize,
new_line: bool,
) -> Option<String> {
let byte_pos = text.char_to_byte(pos);
let new_line_byte_pos = new_line.then_some(byte_pos);
byte_pos: usize,
new_line_byte_pos: Option<usize>,
) -> Option<(Node<'a>, HashMap<usize, Vec<IndentCapture<'b>>>)> {
// The innermost tree-sitter node which is considered for the indent
// computation. It may change if some predeceding node is extended
let mut node = syntax
@ -754,7 +773,6 @@ pub fn treesitter_indent_for_pos(
(query_result, deepest_preceding)
})
};
let mut indent_captures = query_result.indent_captures;
let extend_captures = query_result.extend_captures;
// Check for extend captures, potentially changing the node that the indent calculation starts with
@ -769,6 +787,68 @@ pub fn treesitter_indent_for_pos(
indent_width,
);
}
Some((node, query_result.indent_captures))
}
/// Use the syntax tree to determine the indentation for a given position.
/// This can be used in 2 ways:
///
/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
/// - In this case, pos should be inside the first tree-sitter node on that line.
/// In most cases, this can just be the first non-whitespace on that line.
/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
/// after pos were moved to a new line.
///
/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
/// Each of these nodes produces some [Indentation] for:
///
/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
/// - The line after the node. This is defined by:
/// - The scope `tail`.
/// - The scope `all` if this node is not the first node on its line.
/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
/// The indents from different nodes for the same line are then combined.
/// The result [Indentation] is simply the sum of the [Indentation] for all lines.
///
/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines:
/// ```ignore
/// some_function(|| {
/// // Both the function parameters as well as the contained block should be indented.
/// // Because they are on the same line, this only yields one indent level
/// });
/// ```
///
/// ```ignore
/// some_function(
/// param1,
/// || {
/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
/// },
/// );
/// ```
#[allow(clippy::too_many_arguments)]
pub fn treesitter_indent_for_pos<'a>(
query: &Query,
syntax: &Syntax,
tab_width: usize,
indent_width: usize,
text: RopeSlice<'a>,
line: usize,
pos: usize,
new_line: bool,
) -> Option<Indentation<'a>> {
let byte_pos = text.char_to_byte(pos);
let new_line_byte_pos = new_line.then_some(byte_pos);
let (mut node, mut indent_captures) = init_indent_query(
query,
syntax,
text,
tab_width,
indent_width,
line,
byte_pos,
new_line_byte_pos,
)?;
let mut first_in_line = get_first_in_line(node, new_line.then_some(byte_pos));
let mut result = Indentation::default();
@ -836,7 +916,7 @@ pub fn treesitter_indent_for_pos(
break;
}
}
Some(result.into_string(indent_style))
Some(result)
}
/// Returns the indentation for a new line.
@ -860,7 +940,6 @@ pub fn indent_for_newline(
if let Some(indent) = treesitter_indent_for_pos(
query,
syntax,
indent_style,
tab_width,
indent_width,
text,
@ -868,9 +947,55 @@ pub fn indent_for_newline(
line_before_end_pos,
true,
) {
return indent;
// We want to compute the indentation not only based on the
// syntax tree but also on the actual indentation of a previous
// line. This makes indentation computation more resilient to
// incomplete queries, incomplete source code & differing indentation
// styles for the same language.
// However, using the indent of a previous line as a baseline may not
// make sense, e.g. if it has a different alignment than the new line.
// In order to prevent edge cases with long running times, we only try
// a constant number of (non-empty) lines.
const MAX_ATTEMPTS: usize = 2;
let mut num_attempts = 0;
for line_idx in (0..=line_before).rev() {
let line = text.line(line_idx);
let first_non_whitespace_char = match find_first_non_whitespace_char(line) {
Some(i) => i,
None => {
continue;
}
};
if let Some(indent) = (|| {
let computed_indent = treesitter_indent_for_pos(
query,
syntax,
tab_width,
indent_width,
text,
line_idx,
text.line_to_char(line_idx) + first_non_whitespace_char,
false,
)?;
let leading_whitespace = line.slice(0..first_non_whitespace_char);
indent.relative_indent(
&computed_indent,
leading_whitespace,
indent_style,
tab_width,
)
})() {
return indent;
}
num_attempts += 1;
if num_attempts == MAX_ATTEMPTS {
break;
}
}
return indent.to_string(indent_style);
};
}
// Fallback in case we either don't have indent queries or they failed for some reason
let indent_level = indent_level_for_line(text.line(current_line), tab_width, indent_width);
indent_style.as_str().repeat(indent_level)
}
@ -962,10 +1087,13 @@ mod test {
..Default::default()
};
let add_capture = |mut indent: Indentation, capture| {
fn add_capture<'a>(
mut indent: Indentation<'a>,
capture: IndentCaptureType<'a>,
) -> Indentation<'a> {
indent.add_capture(capture);
indent
};
}
// adding an indent to no indent makes an indent
assert_eq!(

View file

@ -210,7 +210,6 @@ fn test_treesitter_indent(
let suggested_indent = treesitter_indent_for_pos(
indent_query,
&syntax,
&indent_style,
tab_width,
indent_style.indent_width(tab_width),
text,
@ -218,7 +217,8 @@ fn test_treesitter_indent(
text.line_to_char(i) + pos,
false,
)
.unwrap();
.unwrap()
.to_string(&indent_style);
assert!(
line.get_slice(..pos).map_or(false, |s| s == suggested_indent),
"Wrong indentation for file {:?} on line {}:\n\"{}\" (original line)\n\"{}\" (suggested indentation)\n",