properly handle LSP position encoding (#5711)
* properly handle LSP position encoding * add debug assertion to Transaction::change * Apply suggestions from code review Co-authored-by: Michael Davis <mcarsondavis@gmail.com> --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
This commit is contained in:
parent
8a602995fa
commit
7ebcf4e919
4 changed files with 107 additions and 19 deletions
|
@ -203,6 +203,13 @@ pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {
|
|||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn line_end_byte_index(slice: &RopeSlice, line: usize) -> usize {
|
||||
slice.line_to_byte(line + 1)
|
||||
- get_line_ending(&slice.line(line))
|
||||
.map(|le| le.as_str().len())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Fetches line `line_idx` from the passed rope slice, sans any line ending.
|
||||
pub fn line_without_line_ending<'a>(slice: &'a RopeSlice, line_idx: usize) -> RopeSlice<'a> {
|
||||
let start = slice.line_to_char(line_idx);
|
||||
|
|
|
@ -481,6 +481,11 @@ impl Transaction {
|
|||
for (from, to, tendril) in changes {
|
||||
// Verify ranges are ordered and not overlapping
|
||||
debug_assert!(last <= from);
|
||||
// Verify ranges are correct
|
||||
debug_assert!(
|
||||
from <= to,
|
||||
"Edit end must end before it starts (should {from} <= {to})"
|
||||
);
|
||||
|
||||
// Retain from last "to" to current "from"
|
||||
changeset.retain(from - last);
|
||||
|
|
|
@ -104,7 +104,7 @@ impl Client {
|
|||
server_tx,
|
||||
request_counter: AtomicU64::new(0),
|
||||
capabilities: OnceCell::new(),
|
||||
offset_encoding: OffsetEncoding::Utf8,
|
||||
offset_encoding: OffsetEncoding::Utf16,
|
||||
config,
|
||||
req_timeout,
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ pub enum OffsetEncoding {
|
|||
|
||||
pub mod util {
|
||||
use super::*;
|
||||
use helix_core::line_ending::{line_end_byte_index, line_end_char_index};
|
||||
use helix_core::{diagnostic::NumberOrString, Range, Rope, Selection, Tendril, Transaction};
|
||||
|
||||
/// Converts a diagnostic in the document to [`lsp::Diagnostic`].
|
||||
|
@ -117,7 +118,7 @@ pub mod util {
|
|||
|
||||
/// Converts [`lsp::Position`] to a position in the document.
|
||||
///
|
||||
/// Returns `None` if position exceeds document length or an operation overflows.
|
||||
/// Returns `None` if position.line is out of bounds or an overflow occurs
|
||||
pub fn lsp_pos_to_pos(
|
||||
doc: &Rope,
|
||||
pos: lsp::Position,
|
||||
|
@ -128,22 +129,58 @@ pub mod util {
|
|||
return None;
|
||||
}
|
||||
|
||||
match offset_encoding {
|
||||
// We need to be careful here to fully comply ith the LSP spec.
|
||||
// Two relevant quotes from the spec:
|
||||
//
|
||||
// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
|
||||
// > If the character value is greater than the line length it defaults back
|
||||
// > to the line length.
|
||||
//
|
||||
// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocuments
|
||||
// > To ensure that both client and server split the string into the same
|
||||
// > line representation the protocol specifies the following end-of-line sequences:
|
||||
// > ‘\n’, ‘\r\n’ and ‘\r’. Positions are line end character agnostic.
|
||||
// > So you can not specify a position that denotes \r|\n or \n| where | represents the character offset.
|
||||
//
|
||||
// This means that while the line must be in bounds the `charater`
|
||||
// must be capped to the end of the line.
|
||||
// Note that the end of the line here is **before** the line terminator
|
||||
// so we must use `line_end_char_index` istead of `doc.line_to_char(pos_line + 1)`
|
||||
//
|
||||
// FIXME: Helix does not fully comply with the LSP spec for line terminators.
|
||||
// The LSP standard requires that line terminators are ['\n', '\r\n', '\r'].
|
||||
// Without the unicode-linebreak feature disabled, the `\r` terminator is not handled by helix.
|
||||
// With the unicode-linebreak feature, helix recognizes multiple extra line break chars
|
||||
// which means that positions will be decoded/encoded incorrectly in their presence
|
||||
|
||||
let line = match offset_encoding {
|
||||
OffsetEncoding::Utf8 => {
|
||||
let line = doc.line_to_char(pos_line);
|
||||
let pos = line.checked_add(pos.character as usize)?;
|
||||
if pos <= doc.len_chars() {
|
||||
Some(pos)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
let line_start = doc.line_to_byte(pos_line);
|
||||
let line_end = line_end_byte_index(&doc.slice(..), pos_line);
|
||||
line_start..line_end
|
||||
}
|
||||
OffsetEncoding::Utf16 => {
|
||||
let line = doc.line_to_char(pos_line);
|
||||
let line_start = doc.char_to_utf16_cu(line);
|
||||
let pos = line_start.checked_add(pos.character as usize)?;
|
||||
doc.try_utf16_cu_to_char(pos).ok()
|
||||
// TODO directly translate line index to char-idx
|
||||
// ropey can do this just as easily as utf-8 byte translation
|
||||
// but the functions are just missing.
|
||||
// Translate to char first and then utf-16 as a workaround
|
||||
let line_start = doc.line_to_char(pos_line);
|
||||
let line_end = line_end_char_index(&doc.slice(..), pos_line);
|
||||
doc.char_to_utf16_cu(line_start)..doc.char_to_utf16_cu(line_end)
|
||||
}
|
||||
};
|
||||
|
||||
// The LSP spec demands that the offset is capped to the end of the line
|
||||
let pos = line
|
||||
.start
|
||||
.checked_add(pos.character as usize)
|
||||
.unwrap_or(line.end)
|
||||
.min(line.end);
|
||||
|
||||
// TODO prefer UTF32/char indices to avoid this step
|
||||
match offset_encoding {
|
||||
OffsetEncoding::Utf8 => doc.try_byte_to_char(pos).ok(),
|
||||
OffsetEncoding::Utf16 => doc.try_utf16_cu_to_char(pos).ok(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -158,8 +195,8 @@ pub mod util {
|
|||
match offset_encoding {
|
||||
OffsetEncoding::Utf8 => {
|
||||
let line = doc.char_to_line(pos);
|
||||
let line_start = doc.line_to_char(line);
|
||||
let col = pos - line_start;
|
||||
let line_start = doc.line_to_byte(line);
|
||||
let col = doc.char_to_byte(pos) - line_start;
|
||||
|
||||
lsp::Position::new(line as u32, col as u32)
|
||||
}
|
||||
|
@ -606,16 +643,55 @@ mod tests {
|
|||
}
|
||||
|
||||
test_case!("", (0, 0) => Some(0));
|
||||
test_case!("", (0, 1) => None);
|
||||
test_case!("", (0, 1) => Some(0));
|
||||
test_case!("", (1, 0) => None);
|
||||
test_case!("\n\n", (0, 0) => Some(0));
|
||||
test_case!("\n\n", (1, 0) => Some(1));
|
||||
test_case!("\n\n", (1, 1) => Some(2));
|
||||
test_case!("\n\n", (1, 1) => Some(1));
|
||||
test_case!("\n\n", (2, 0) => Some(2));
|
||||
test_case!("\n\n", (3, 0) => None);
|
||||
test_case!("test\n\n\n\ncase", (4, 3) => Some(11));
|
||||
test_case!("test\n\n\n\ncase", (4, 4) => Some(12));
|
||||
test_case!("test\n\n\n\ncase", (4, 5) => None);
|
||||
test_case!("test\n\n\n\ncase", (4, 5) => Some(12));
|
||||
test_case!("", (u32::MAX, u32::MAX) => None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emoji_format_gh_4791() {
|
||||
use lsp_types::{Position, Range, TextEdit};
|
||||
|
||||
let edits = vec![
|
||||
TextEdit {
|
||||
range: Range {
|
||||
start: Position {
|
||||
line: 0,
|
||||
character: 1,
|
||||
},
|
||||
end: Position {
|
||||
line: 1,
|
||||
character: 0,
|
||||
},
|
||||
},
|
||||
new_text: "\n ".to_string(),
|
||||
},
|
||||
TextEdit {
|
||||
range: Range {
|
||||
start: Position {
|
||||
line: 1,
|
||||
character: 7,
|
||||
},
|
||||
end: Position {
|
||||
line: 2,
|
||||
character: 0,
|
||||
},
|
||||
},
|
||||
new_text: "\n ".to_string(),
|
||||
},
|
||||
];
|
||||
|
||||
let mut source = Rope::from_str("[\n\"🇺🇸\",\n\"🎄\",\n]");
|
||||
|
||||
let transaction = generate_transaction_from_edits(&source, edits, OffsetEncoding::Utf8);
|
||||
assert!(transaction.apply(&mut source));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue