Merge pull request #224 from helix-editor/line_ending_detection
Line ending detection
This commit is contained in:
commit
a70de6e980
17 changed files with 562 additions and 205 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -344,6 +344,7 @@ dependencies = [
|
|||
"bitflags",
|
||||
"cassowary",
|
||||
"crossterm",
|
||||
"helix-core",
|
||||
"serde",
|
||||
"unicode-segmentation",
|
||||
"unicode-width",
|
||||
|
|
|
@ -12,7 +12,7 @@ pub const PAIRS: &[(char, char)] = &[
|
|||
('`', '`'),
|
||||
];
|
||||
|
||||
const CLOSE_BEFORE: &str = ")]}'\":;> \n"; // includes space and newline
|
||||
const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines
|
||||
|
||||
// insert hook:
|
||||
// Fn(doc, selection, char) => Option<Transaction>
|
||||
|
|
|
@ -1,25 +1,44 @@
|
|||
/// Determine whether a character is a line break.
|
||||
pub fn char_is_linebreak(c: char) -> bool {
|
||||
matches!(
|
||||
c,
|
||||
'\u{000A}' | // LineFeed
|
||||
'\u{000B}' | // VerticalTab
|
||||
'\u{000C}' | // FormFeed
|
||||
'\u{000D}' | // CarriageReturn
|
||||
'\u{0085}' | // NextLine
|
||||
'\u{2028}' | // Line Separator
|
||||
'\u{2029}' // ParagraphSeparator
|
||||
)
|
||||
use crate::LineEnding;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum CharCategory {
|
||||
Whitespace,
|
||||
Eol,
|
||||
Word,
|
||||
Punctuation,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn categorize_char(ch: char) -> CharCategory {
|
||||
if char_is_line_ending(ch) {
|
||||
CharCategory::Eol
|
||||
} else if ch.is_whitespace() {
|
||||
CharCategory::Whitespace
|
||||
} else if char_is_word(ch) {
|
||||
CharCategory::Word
|
||||
} else if char_is_punctuation(ch) {
|
||||
CharCategory::Punctuation
|
||||
} else {
|
||||
CharCategory::Unknown
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether a character is a line ending.
|
||||
#[inline]
|
||||
pub fn char_is_line_ending(ch: char) -> bool {
|
||||
LineEnding::from_char(ch).is_some()
|
||||
}
|
||||
|
||||
/// Determine whether a character qualifies as (non-line-break)
|
||||
/// whitespace.
|
||||
pub fn char_is_whitespace(c: char) -> bool {
|
||||
#[inline]
|
||||
pub fn char_is_whitespace(ch: char) -> bool {
|
||||
// TODO: this is a naive binary categorization of whitespace
|
||||
// characters. For display, word wrapping, etc. we'll need a better
|
||||
// categorization based on e.g. breaking vs non-breaking spaces
|
||||
// and whether they're zero-width or not.
|
||||
match c {
|
||||
match ch {
|
||||
//'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
|
||||
'\u{0009}' | // Character Tabulation
|
||||
'\u{0020}' | // Space
|
||||
|
@ -34,8 +53,81 @@ pub fn char_is_whitespace(c: char) -> bool {
|
|||
// En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
|
||||
// Four-per-em Space, Six-per-em Space, Figure Space,
|
||||
// Punctuation Space, Thin Space, Hair Space, Zero Width Space.
|
||||
c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
|
||||
ch if ('\u{2000}' ..= '\u{200B}').contains(&ch) => true,
|
||||
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn char_is_punctuation(ch: char) -> bool {
|
||||
use unicode_general_category::{get_general_category, GeneralCategory};
|
||||
|
||||
matches!(
|
||||
get_general_category(ch),
|
||||
GeneralCategory::OtherPunctuation
|
||||
| GeneralCategory::OpenPunctuation
|
||||
| GeneralCategory::ClosePunctuation
|
||||
| GeneralCategory::InitialPunctuation
|
||||
| GeneralCategory::FinalPunctuation
|
||||
| GeneralCategory::ConnectorPunctuation
|
||||
| GeneralCategory::DashPunctuation
|
||||
| GeneralCategory::MathSymbol
|
||||
| GeneralCategory::CurrencySymbol
|
||||
| GeneralCategory::ModifierSymbol
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn char_is_word(ch: char) -> bool {
|
||||
ch.is_alphanumeric() || ch == '_'
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_categorize() {
|
||||
const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}";
|
||||
const WORD_TEST_CASE: &'static str =
|
||||
"_hello_world_あいうえおー12345678901234567890";
|
||||
const PUNCTUATION_TEST_CASE: &'static str =
|
||||
"!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
|
||||
const WHITESPACE_TEST_CASE: &'static str = " ";
|
||||
|
||||
for ch in EOL_TEST_CASE.chars() {
|
||||
assert_eq!(CharCategory::Eol, categorize_char(ch));
|
||||
}
|
||||
|
||||
for ch in WHITESPACE_TEST_CASE.chars() {
|
||||
assert_eq!(
|
||||
CharCategory::Whitespace,
|
||||
categorize_char(ch),
|
||||
"Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
|
||||
ch,
|
||||
categorize_char(ch)
|
||||
);
|
||||
}
|
||||
|
||||
for ch in WORD_TEST_CASE.chars() {
|
||||
assert_eq!(
|
||||
CharCategory::Word,
|
||||
categorize_char(ch),
|
||||
"Testing '{}', but got `{:?}` instead of `Category::Word`",
|
||||
ch,
|
||||
categorize_char(ch)
|
||||
);
|
||||
}
|
||||
|
||||
for ch in PUNCTUATION_TEST_CASE.chars() {
|
||||
assert_eq!(
|
||||
CharCategory::Punctuation,
|
||||
categorize_char(ch),
|
||||
"Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
|
||||
ch,
|
||||
categorize_char(ch)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ pub mod diagnostic;
|
|||
pub mod graphemes;
|
||||
pub mod history;
|
||||
pub mod indent;
|
||||
pub mod line_ending;
|
||||
pub mod macros;
|
||||
pub mod match_brackets;
|
||||
pub mod movement;
|
||||
|
@ -106,6 +107,7 @@ pub use tendril::StrTendril as Tendril;
|
|||
#[doc(inline)]
|
||||
pub use {regex, tree_sitter};
|
||||
|
||||
pub use graphemes::RopeGraphemes;
|
||||
pub use position::{coords_at_pos, pos_at_coords, Position};
|
||||
pub use selection::{Range, Selection};
|
||||
pub use smallvec::SmallVec;
|
||||
|
@ -114,4 +116,5 @@ pub use syntax::Syntax;
|
|||
pub use diagnostic::Diagnostic;
|
||||
pub use state::State;
|
||||
|
||||
pub use line_ending::{LineEnding, DEFAULT_LINE_ENDING};
|
||||
pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction};
|
||||
|
|
252
helix-core/src/line_ending.rs
Normal file
252
helix-core/src/line_ending.rs
Normal file
|
@ -0,0 +1,252 @@
|
|||
use crate::{Rope, RopeGraphemes, RopeSlice};
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
|
||||
|
||||
/// Represents one of the valid Unicode line endings.
|
||||
#[derive(PartialEq, Copy, Clone, Debug)]
|
||||
pub enum LineEnding {
|
||||
Crlf, // CarriageReturn followed by LineFeed
|
||||
LF, // U+000A -- LineFeed
|
||||
VT, // U+000B -- VerticalTab
|
||||
FF, // U+000C -- FormFeed
|
||||
CR, // U+000D -- CarriageReturn
|
||||
Nel, // U+0085 -- NextLine
|
||||
LS, // U+2028 -- Line Separator
|
||||
PS, // U+2029 -- ParagraphSeparator
|
||||
}
|
||||
|
||||
impl LineEnding {
|
||||
#[inline]
|
||||
pub fn len_chars(&self) -> usize {
|
||||
match self {
|
||||
Self::Crlf => 2,
|
||||
_ => 1,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Crlf => "\u{000D}\u{000A}",
|
||||
Self::LF => "\u{000A}",
|
||||
Self::VT => "\u{000B}",
|
||||
Self::FF => "\u{000C}",
|
||||
Self::CR => "\u{000D}",
|
||||
Self::Nel => "\u{0085}",
|
||||
Self::LS => "\u{2028}",
|
||||
Self::PS => "\u{2029}",
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_char(ch: char) -> Option<LineEnding> {
|
||||
match ch {
|
||||
'\u{000A}' => Some(LineEnding::LF),
|
||||
'\u{000B}' => Some(LineEnding::VT),
|
||||
'\u{000C}' => Some(LineEnding::FF),
|
||||
'\u{000D}' => Some(LineEnding::CR),
|
||||
'\u{0085}' => Some(LineEnding::Nel),
|
||||
'\u{2028}' => Some(LineEnding::LS),
|
||||
'\u{2029}' => Some(LineEnding::PS),
|
||||
// Not a line ending
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// Normally we'd want to implement the FromStr trait, but in this case
|
||||
// that would force us into a different return type than from_char or
|
||||
// or from_rope_slice, which would be weird.
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
#[inline]
|
||||
pub fn from_str(g: &str) -> Option<LineEnding> {
|
||||
match g {
|
||||
"\u{000D}\u{000A}" => Some(LineEnding::Crlf),
|
||||
"\u{000A}" => Some(LineEnding::LF),
|
||||
"\u{000B}" => Some(LineEnding::VT),
|
||||
"\u{000C}" => Some(LineEnding::FF),
|
||||
"\u{000D}" => Some(LineEnding::CR),
|
||||
"\u{0085}" => Some(LineEnding::Nel),
|
||||
"\u{2028}" => Some(LineEnding::LS),
|
||||
"\u{2029}" => Some(LineEnding::PS),
|
||||
// Not a line ending
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> {
|
||||
if let Some(text) = g.as_str() {
|
||||
LineEnding::from_str(text)
|
||||
} else {
|
||||
// Non-contiguous, so it can't be a line ending.
|
||||
// Specifically, Ropey guarantees that CRLF is always
|
||||
// contiguous. And the remaining line endings are all
|
||||
// single `char`s, and therefore trivially contiguous.
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn str_is_line_ending(s: &str) -> bool {
|
||||
LineEnding::from_str(s).is_some()
|
||||
}
|
||||
|
||||
/// Attempts to detect what line ending the passed document uses.
|
||||
pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> {
|
||||
// Return first matched line ending. Not all possible line endings
|
||||
// are being matched, as they might be special-use only
|
||||
for line in doc.lines().take(100) {
|
||||
match get_line_ending(&line) {
|
||||
None | Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {}
|
||||
ending => return ending,
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns the passed line's line ending, if any.
|
||||
pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> {
|
||||
// Last character as str.
|
||||
let g1 = line
|
||||
.slice(line.len_chars().saturating_sub(1)..)
|
||||
.as_str()
|
||||
.unwrap();
|
||||
|
||||
// Last two characters as str, or empty str if they're not contiguous.
|
||||
// It's fine to punt on the non-contiguous case, because Ropey guarantees
|
||||
// that CRLF is always contiguous.
|
||||
let g2 = line
|
||||
.slice(line.len_chars().saturating_sub(2)..)
|
||||
.as_str()
|
||||
.unwrap_or("");
|
||||
|
||||
// First check the two-character case for CRLF, then check the single-character case.
|
||||
LineEnding::from_str(g2).or_else(|| LineEnding::from_str(g1))
|
||||
}
|
||||
|
||||
/// Returns the passed line's line ending, if any.
|
||||
pub fn get_line_ending_of_str(line: &str) -> Option<LineEnding> {
|
||||
if line.ends_with("\u{000D}\u{000A}") {
|
||||
Some(LineEnding::Crlf)
|
||||
} else if line.ends_with('\u{000A}') {
|
||||
Some(LineEnding::LF)
|
||||
} else if line.ends_with('\u{000B}') {
|
||||
Some(LineEnding::VT)
|
||||
} else if line.ends_with('\u{000C}') {
|
||||
Some(LineEnding::FF)
|
||||
} else if line.ends_with('\u{000D}') {
|
||||
Some(LineEnding::CR)
|
||||
} else if line.ends_with('\u{0085}') {
|
||||
Some(LineEnding::Nel)
|
||||
} else if line.ends_with('\u{2028}') {
|
||||
Some(LineEnding::LS)
|
||||
} else if line.ends_with('\u{2029}') {
|
||||
Some(LineEnding::PS)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the char index of the end of the given line, not including its line ending.
|
||||
pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {
|
||||
slice.line_to_char(line + 1)
|
||||
- get_line_ending(&slice.line(line))
|
||||
.map(|le| le.len_chars())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod line_ending_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn line_ending_autodetect() {
|
||||
assert_eq!(
|
||||
auto_detect_line_ending(&Rope::from_str("\n")),
|
||||
Some(LineEnding::LF)
|
||||
);
|
||||
assert_eq!(
|
||||
auto_detect_line_ending(&Rope::from_str("\r\n")),
|
||||
Some(LineEnding::Crlf)
|
||||
);
|
||||
assert_eq!(auto_detect_line_ending(&Rope::from_str("hello")), None);
|
||||
assert_eq!(auto_detect_line_ending(&Rope::from_str("")), None);
|
||||
assert_eq!(
|
||||
auto_detect_line_ending(&Rope::from_str("hello\nhelix\r\n")),
|
||||
Some(LineEnding::LF)
|
||||
);
|
||||
assert_eq!(
|
||||
auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C}")),
|
||||
None
|
||||
);
|
||||
assert_eq!(
|
||||
auto_detect_line_ending(&Rope::from_str("\n\u{000A}\n \u{000A}")),
|
||||
Some(LineEnding::LF)
|
||||
);
|
||||
assert_eq!(
|
||||
auto_detect_line_ending(&Rope::from_str(
|
||||
"a formfeed\u{000C} with a\u{000C} linefeed\u{000A}"
|
||||
)),
|
||||
Some(LineEnding::LF)
|
||||
);
|
||||
assert_eq!(auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C} with a\u{000C} carriage return linefeed\u{000D}\u{000A} and a linefeed\u{000A}")), Some(LineEnding::Crlf));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn str_to_line_ending() {
|
||||
assert_eq!(LineEnding::from_str("\r"), Some(LineEnding::CR));
|
||||
assert_eq!(LineEnding::from_str("\n"), Some(LineEnding::LF));
|
||||
assert_eq!(LineEnding::from_str("\r\n"), Some(LineEnding::Crlf));
|
||||
assert_eq!(LineEnding::from_str("hello\n"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rope_slice_to_line_ending() {
|
||||
let r = Rope::from_str("hello\r\n");
|
||||
assert_eq!(
|
||||
LineEnding::from_rope_slice(&r.slice(5..6)),
|
||||
Some(LineEnding::CR)
|
||||
);
|
||||
assert_eq!(
|
||||
LineEnding::from_rope_slice(&r.slice(6..7)),
|
||||
Some(LineEnding::LF)
|
||||
);
|
||||
assert_eq!(
|
||||
LineEnding::from_rope_slice(&r.slice(5..7)),
|
||||
Some(LineEnding::Crlf)
|
||||
);
|
||||
assert_eq!(LineEnding::from_rope_slice(&r.slice(..)), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_line_ending_rope_slice() {
|
||||
let r = Rope::from_str("Hello\rworld\nhow\r\nare you?");
|
||||
assert_eq!(get_line_ending(&r.slice(..6)), Some(LineEnding::CR));
|
||||
assert_eq!(get_line_ending(&r.slice(..12)), Some(LineEnding::LF));
|
||||
assert_eq!(get_line_ending(&r.slice(..17)), Some(LineEnding::Crlf));
|
||||
assert_eq!(get_line_ending(&r.slice(..)), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_line_ending_str() {
|
||||
let text = "Hello\rworld\nhow\r\nare you?";
|
||||
assert_eq!(get_line_ending_of_str(&text[..6]), Some(LineEnding::CR));
|
||||
assert_eq!(get_line_ending_of_str(&text[..12]), Some(LineEnding::LF));
|
||||
assert_eq!(get_line_ending_of_str(&text[..17]), Some(LineEnding::Crlf));
|
||||
assert_eq!(get_line_ending_of_str(&text[..]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_end_char_index_rope_slice() {
|
||||
let r = Rope::from_str("Hello\rworld\nhow\r\nare you?");
|
||||
let s = &r.slice(..);
|
||||
assert_eq!(line_end_char_index(s, 0), 5);
|
||||
assert_eq!(line_end_char_index(s, 1), 11);
|
||||
assert_eq!(line_end_char_index(s, 2), 15);
|
||||
assert_eq!(line_end_char_index(s, 3), 25);
|
||||
}
|
||||
}
|
|
@ -3,8 +3,13 @@ use std::iter::{self, from_fn, Peekable, SkipWhile};
|
|||
use ropey::iter::Chars;
|
||||
|
||||
use crate::{
|
||||
chars::{
|
||||
categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace,
|
||||
char_is_word, CharCategory,
|
||||
},
|
||||
coords_at_pos,
|
||||
graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary},
|
||||
line_ending::{get_line_ending, line_end_char_index},
|
||||
pos_at_coords, Position, Range, RopeSlice,
|
||||
};
|
||||
|
||||
|
@ -37,9 +42,8 @@ pub fn move_horizontally(
|
|||
nth_prev_grapheme_boundary(slice, pos, count).max(start)
|
||||
}
|
||||
Direction::Forward => {
|
||||
// Line end is pos at the start of next line - 1
|
||||
let end = slice.line_to_char(line + 1).saturating_sub(1);
|
||||
nth_next_grapheme_boundary(slice, pos, count).min(end)
|
||||
let end_char_idx = line_end_char_index(&slice, line);
|
||||
nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx)
|
||||
}
|
||||
};
|
||||
let anchor = match behaviour {
|
||||
|
@ -68,8 +72,11 @@ pub fn move_vertically(
|
|||
),
|
||||
};
|
||||
|
||||
// convert to 0-indexed, subtract another 1 because len_chars() counts \n
|
||||
let new_line_len = slice.line(new_line).len_chars().saturating_sub(2);
|
||||
// Length of the line sans line-ending.
|
||||
let new_line_len = {
|
||||
let line = slice.line(new_line);
|
||||
line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0)
|
||||
};
|
||||
|
||||
let new_col = std::cmp::min(horiz as usize, new_line_len);
|
||||
|
||||
|
@ -104,64 +111,6 @@ fn word_move(slice: RopeSlice, mut range: Range, count: usize, target: WordMotio
|
|||
}
|
||||
|
||||
// ---- util ------------
|
||||
#[inline]
|
||||
pub(crate) fn is_word(ch: char) -> bool {
|
||||
ch.is_alphanumeric() || ch == '_'
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn is_end_of_line(ch: char) -> bool {
|
||||
ch == '\n'
|
||||
}
|
||||
|
||||
#[inline]
|
||||
// Whitespace, but not end of line
|
||||
pub(crate) fn is_strict_whitespace(ch: char) -> bool {
|
||||
ch.is_whitespace() && !is_end_of_line(ch)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn is_punctuation(ch: char) -> bool {
|
||||
use unicode_general_category::{get_general_category, GeneralCategory};
|
||||
|
||||
matches!(
|
||||
get_general_category(ch),
|
||||
GeneralCategory::OtherPunctuation
|
||||
| GeneralCategory::OpenPunctuation
|
||||
| GeneralCategory::ClosePunctuation
|
||||
| GeneralCategory::InitialPunctuation
|
||||
| GeneralCategory::FinalPunctuation
|
||||
| GeneralCategory::ConnectorPunctuation
|
||||
| GeneralCategory::DashPunctuation
|
||||
| GeneralCategory::MathSymbol
|
||||
| GeneralCategory::CurrencySymbol
|
||||
| GeneralCategory::ModifierSymbol
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum Category {
|
||||
Whitespace,
|
||||
Eol,
|
||||
Word,
|
||||
Punctuation,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn categorize(ch: char) -> Category {
|
||||
if is_end_of_line(ch) {
|
||||
Category::Eol
|
||||
} else if ch.is_whitespace() {
|
||||
Category::Whitespace
|
||||
} else if is_word(ch) {
|
||||
Category::Word
|
||||
} else if is_punctuation(ch) {
|
||||
Category::Punctuation
|
||||
} else {
|
||||
Category::Unknown
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// Returns first index that doesn't satisfy a given predicate when
|
||||
|
@ -235,7 +184,8 @@ impl CharHelpers for Chars<'_> {
|
|||
let mut phase = WordMotionPhase::Start;
|
||||
let mut head = origin.head;
|
||||
let mut anchor: Option<usize> = None;
|
||||
let is_boundary = |a: char, b: Option<char>| categorize(a) != categorize(b.unwrap_or(a));
|
||||
let is_boundary =
|
||||
|a: char, b: Option<char>| categorize_char(a) != categorize_char(b.unwrap_or(a));
|
||||
while let Some(peek) = characters.peek().copied() {
|
||||
phase = match phase {
|
||||
WordMotionPhase::Start => {
|
||||
|
@ -244,7 +194,8 @@ impl CharHelpers for Chars<'_> {
|
|||
break; // We're at the end, so there's nothing to do.
|
||||
}
|
||||
// Anchor may remain here if the head wasn't at a boundary
|
||||
if !is_boundary(peek, characters.peek().copied()) && !is_end_of_line(peek) {
|
||||
if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek)
|
||||
{
|
||||
anchor = Some(head);
|
||||
}
|
||||
// First character is always skipped by the head
|
||||
|
@ -252,7 +203,7 @@ impl CharHelpers for Chars<'_> {
|
|||
WordMotionPhase::SkipNewlines
|
||||
}
|
||||
WordMotionPhase::SkipNewlines => {
|
||||
if is_end_of_line(peek) {
|
||||
if char_is_line_ending(peek) {
|
||||
characters.next();
|
||||
if characters.peek().is_some() {
|
||||
advance(&mut head);
|
||||
|
@ -286,12 +237,12 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
|
|||
|
||||
match target {
|
||||
WordMotionTarget::NextWordStart => {
|
||||
((categorize(peek) != categorize(*next_peek))
|
||||
&& (is_end_of_line(*next_peek) || !next_peek.is_whitespace()))
|
||||
((categorize_char(peek) != categorize_char(*next_peek))
|
||||
&& (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()))
|
||||
}
|
||||
WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => {
|
||||
((categorize(peek) != categorize(*next_peek))
|
||||
&& (!peek.is_whitespace() || is_end_of_line(*next_peek)))
|
||||
((categorize_char(peek) != categorize_char(*next_peek))
|
||||
&& (!peek.is_whitespace() || char_is_line_ending(*next_peek)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -330,7 +281,7 @@ mod test {
|
|||
slice,
|
||||
move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head
|
||||
),
|
||||
(1, 2).into()
|
||||
(1, 3).into()
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -343,12 +294,12 @@ mod test {
|
|||
let mut range = Range::point(position);
|
||||
|
||||
let moves_and_expected_coordinates = [
|
||||
((Direction::Forward, 1usize), (0, 1)),
|
||||
((Direction::Forward, 2usize), (0, 3)),
|
||||
((Direction::Forward, 0usize), (0, 3)),
|
||||
((Direction::Forward, 999usize), (0, 31)),
|
||||
((Direction::Forward, 999usize), (0, 31)),
|
||||
((Direction::Backward, 999usize), (0, 0)),
|
||||
((Direction::Forward, 1usize), (0, 1)), // T|his is a simple alphabetic line
|
||||
((Direction::Forward, 2usize), (0, 3)), // Thi|s is a simple alphabetic line
|
||||
((Direction::Forward, 0usize), (0, 3)), // Thi|s is a simple alphabetic line
|
||||
((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
|
||||
((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
|
||||
((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line
|
||||
];
|
||||
|
||||
for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) {
|
||||
|
@ -366,15 +317,15 @@ mod test {
|
|||
let mut range = Range::point(position);
|
||||
|
||||
let moves_and_expected_coordinates = IntoIter::new([
|
||||
((Direction::Forward, 1usize), (0, 1)), // M_ltiline
|
||||
((Direction::Forward, 2usize), (0, 3)), // Mul_iline
|
||||
((Direction::Backward, 6usize), (0, 0)), // _ultiline
|
||||
((Direction::Backward, 999usize), (0, 0)), // _ultiline
|
||||
((Direction::Forward, 3usize), (0, 3)), // Mul_iline
|
||||
((Direction::Forward, 0usize), (0, 3)), // Mul_iline
|
||||
((Direction::Backward, 0usize), (0, 3)), // Mul_iline
|
||||
((Direction::Forward, 999usize), (0, 9)), // Multilin_
|
||||
((Direction::Forward, 999usize), (0, 9)), // Multilin_
|
||||
((Direction::Forward, 1usize), (0, 1)), // M|ultiline\n
|
||||
((Direction::Forward, 2usize), (0, 3)), // Mul|tiline\n
|
||||
((Direction::Backward, 6usize), (0, 0)), // |Multiline\n
|
||||
((Direction::Backward, 999usize), (0, 0)), // |Multiline\n
|
||||
((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\n
|
||||
((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\n
|
||||
((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\n
|
||||
((Direction::Forward, 999usize), (0, 9)), // Multiline|\n
|
||||
((Direction::Forward, 999usize), (0, 9)), // Multiline|\n
|
||||
]);
|
||||
|
||||
for ((direction, amount), coordinates) in moves_and_expected_coordinates {
|
||||
|
@ -446,7 +397,7 @@ mod test {
|
|||
// First descent preserves column as the target line is wider
|
||||
((Axis::V, Direction::Forward, 1usize), (1, 8)),
|
||||
// Second descent clamps column as the target line is shorter
|
||||
((Axis::V, Direction::Forward, 1usize), (2, 4)),
|
||||
((Axis::V, Direction::Forward, 1usize), (2, 5)),
|
||||
// Third descent restores the original column
|
||||
((Axis::V, Direction::Forward, 1usize), (3, 8)),
|
||||
// Behaviour is preserved even through long jumps
|
||||
|
@ -760,45 +711,4 @@ mod test {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_categorize() {
|
||||
const WORD_TEST_CASE: &'static str =
|
||||
"_hello_world_あいうえおー12345678901234567890";
|
||||
const PUNCTUATION_TEST_CASE: &'static str =
|
||||
"!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
|
||||
const WHITESPACE_TEST_CASE: &'static str = " ";
|
||||
|
||||
assert_eq!(Category::Eol, categorize('\n'));
|
||||
|
||||
for ch in WHITESPACE_TEST_CASE.chars() {
|
||||
assert_eq!(
|
||||
Category::Whitespace,
|
||||
categorize(ch),
|
||||
"Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
|
||||
ch,
|
||||
categorize(ch)
|
||||
);
|
||||
}
|
||||
|
||||
for ch in WORD_TEST_CASE.chars() {
|
||||
assert_eq!(
|
||||
Category::Word,
|
||||
categorize(ch),
|
||||
"Testing '{}', but got `{:?}` instead of `Category::Word`",
|
||||
ch,
|
||||
categorize(ch)
|
||||
);
|
||||
}
|
||||
|
||||
for ch in PUNCTUATION_TEST_CASE.chars() {
|
||||
assert_eq!(
|
||||
Category::Punctuation,
|
||||
categorize(ch),
|
||||
"Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
|
||||
ch,
|
||||
categorize(ch)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use crate::{
|
||||
chars::char_is_line_ending,
|
||||
graphemes::{nth_next_grapheme_boundary, RopeGraphemes},
|
||||
Rope, RopeSlice,
|
||||
};
|
||||
|
@ -23,8 +24,9 @@ impl Position {
|
|||
pub fn traverse(self, text: &crate::Tendril) -> Self {
|
||||
let Self { mut row, mut col } = self;
|
||||
// TODO: there should be a better way here
|
||||
for ch in text.chars() {
|
||||
if ch == '\n' {
|
||||
let mut chars = text.chars().peekable();
|
||||
while let Some(ch) = chars.next() {
|
||||
if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
|
||||
row += 1;
|
||||
col = 0;
|
||||
} else {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::{regex::Regex, Change, Rope, RopeSlice, Transaction};
|
||||
use crate::{chars::char_is_line_ending, regex::Regex, Change, Rope, RopeSlice, Transaction};
|
||||
pub use helix_syntax::{get_language, get_language_name, Lang};
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
|
@ -589,9 +589,10 @@ impl LanguageLayer {
|
|||
mut column,
|
||||
} = point;
|
||||
|
||||
// TODO: there should be a better way here
|
||||
for ch in text.bytes() {
|
||||
if ch == b'\n' {
|
||||
// TODO: there should be a better way here.
|
||||
let mut chars = text.chars().peekable();
|
||||
while let Some(ch) = chars.next() {
|
||||
if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
|
||||
row += 1;
|
||||
column = 0;
|
||||
} else {
|
||||
|
|
|
@ -3,7 +3,7 @@ use crate::{
|
|||
Call, Error, OffsetEncoding, Result,
|
||||
};
|
||||
|
||||
use helix_core::{find_root, ChangeSet, Rope};
|
||||
use helix_core::{chars::char_is_line_ending, find_root, ChangeSet, Rope};
|
||||
use jsonrpc_core as jsonrpc;
|
||||
use lsp_types as lsp;
|
||||
use serde_json::Value;
|
||||
|
@ -337,8 +337,9 @@ impl Client {
|
|||
mut character,
|
||||
} = pos;
|
||||
|
||||
for ch in text.chars() {
|
||||
if ch == '\n' {
|
||||
let mut chars = text.chars().peekable();
|
||||
while let Some(ch) = chars.next() {
|
||||
if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
|
||||
line += 1;
|
||||
character = 0;
|
||||
} else {
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
use helix_core::{
|
||||
comment, coords_at_pos, find_first_non_whitespace_char, find_root, graphemes, indent,
|
||||
line_ending::{
|
||||
get_line_ending, get_line_ending_of_str, line_end_char_index, str_is_line_ending,
|
||||
},
|
||||
match_brackets,
|
||||
movement::{self, Direction},
|
||||
object, pos_at_coords,
|
||||
regex::{self, Regex},
|
||||
register::{self, Register, Registers},
|
||||
search, selection, Change, ChangeSet, Position, Range, Rope, RopeSlice, Selection, SmallVec,
|
||||
Tendril, Transaction,
|
||||
search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeGraphemes,
|
||||
RopeSlice, Selection, SmallVec, Tendril, Transaction, DEFAULT_LINE_ENDING,
|
||||
};
|
||||
|
||||
use helix_view::{
|
||||
|
@ -303,9 +306,8 @@ fn move_line_end(cx: &mut Context) {
|
|||
let text = doc.text();
|
||||
let line = text.char_to_line(range.head);
|
||||
|
||||
// Line end is pos at the start of next line - 1
|
||||
// subtract another 1 because the line ends with \n
|
||||
let pos = text.line_to_char(line + 1).saturating_sub(2);
|
||||
let pos = line_end_char_index(&text.slice(..), line);
|
||||
|
||||
Range::new(pos, pos)
|
||||
});
|
||||
|
||||
|
@ -452,12 +454,28 @@ where
|
|||
let count = cx.count();
|
||||
|
||||
// need to wait for next key
|
||||
// TODO: should this be done by grapheme rather than char? For example,
|
||||
// we can't properly handle the line-ending CRLF case here in terms of char.
|
||||
cx.on_next_key(move |cx, event| {
|
||||
let ch = match event {
|
||||
KeyEvent {
|
||||
code: KeyCode::Enter,
|
||||
..
|
||||
} => '\n',
|
||||
} =>
|
||||
// TODO: this isn't quite correct when CRLF is involved.
|
||||
// This hack will work in most cases, since documents don't
|
||||
// usually mix line endings. But we should fix it eventually
|
||||
// anyway.
|
||||
{
|
||||
current!(cx.editor)
|
||||
.1
|
||||
.line_ending
|
||||
.as_str()
|
||||
.chars()
|
||||
.next()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
KeyEvent {
|
||||
code: KeyCode::Char(ch),
|
||||
..
|
||||
|
@ -575,32 +593,37 @@ fn extend_first_nonwhitespace(cx: &mut Context) {
|
|||
}
|
||||
|
||||
fn replace(cx: &mut Context) {
|
||||
let mut buf = [0u8; 4]; // To hold utf8 encoded char.
|
||||
|
||||
// need to wait for next key
|
||||
cx.on_next_key(move |cx, event| {
|
||||
let (view, doc) = current!(cx.editor);
|
||||
let ch = match event {
|
||||
KeyEvent {
|
||||
code: KeyCode::Char(ch),
|
||||
..
|
||||
} => Some(ch),
|
||||
} => Some(&ch.encode_utf8(&mut buf[..])[..]),
|
||||
KeyEvent {
|
||||
code: KeyCode::Enter,
|
||||
..
|
||||
} => Some('\n'),
|
||||
} => Some(doc.line_ending.as_str()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(ch) = ch {
|
||||
let (view, doc) = current!(cx.editor);
|
||||
|
||||
let transaction =
|
||||
Transaction::change_by_selection(doc.text(), doc.selection(view.id), |range| {
|
||||
let max_to = doc.text().len_chars().saturating_sub(1);
|
||||
let to = std::cmp::min(max_to, range.to() + 1);
|
||||
let text: String = doc
|
||||
.text()
|
||||
.slice(range.from()..to)
|
||||
.chars()
|
||||
.map(|c| if c == '\n' { '\n' } else { ch })
|
||||
let text: String = RopeGraphemes::new(doc.text().slice(range.from()..to))
|
||||
.map(|g| {
|
||||
let cow: Cow<str> = g.into();
|
||||
if str_is_line_ending(&cow) {
|
||||
cow
|
||||
} else {
|
||||
ch.into()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
(range.from(), to, Some(text.into()))
|
||||
|
@ -725,9 +748,8 @@ fn extend_line_end(cx: &mut Context) {
|
|||
let text = doc.text();
|
||||
let line = text.char_to_line(range.head);
|
||||
|
||||
// Line end is pos at the start of next line - 1
|
||||
// subtract another 1 because the line ends with \n
|
||||
let pos = text.line_to_char(line + 1).saturating_sub(2);
|
||||
let pos = line_end_char_index(&text.slice(..), line);
|
||||
|
||||
Range::new(range.anchor, pos)
|
||||
});
|
||||
|
||||
|
@ -783,7 +805,8 @@ fn split_selection_on_newline(cx: &mut Context) {
|
|||
let text = doc.text().slice(..);
|
||||
// only compile the regex once
|
||||
#[allow(clippy::trivial_regex)]
|
||||
static REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n").unwrap());
|
||||
static REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"\r\n|[\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}]").unwrap());
|
||||
let selection = selection::split_on_matches(text, doc.selection(view.id), ®EX);
|
||||
doc.set_selection(view.id, selection);
|
||||
}
|
||||
|
@ -922,7 +945,13 @@ fn delete_selection_impl(reg: &mut Register, doc: &mut Document, view_id: ViewId
|
|||
// then delete
|
||||
let transaction =
|
||||
Transaction::change_by_selection(doc.text(), doc.selection(view_id), |range| {
|
||||
let max_to = doc.text().len_chars().saturating_sub(1);
|
||||
let alltext = doc.text();
|
||||
let line = alltext.char_to_line(range.head);
|
||||
let max_to = doc.text().len_chars().saturating_sub(
|
||||
get_line_ending(&alltext.line(line))
|
||||
.map(|le| le.len_chars())
|
||||
.unwrap_or(0),
|
||||
);
|
||||
let to = std::cmp::min(max_to, range.to() + 1);
|
||||
(range.from(), to, None)
|
||||
});
|
||||
|
@ -1003,7 +1032,7 @@ fn append_mode(cx: &mut Context) {
|
|||
if selection.iter().any(|range| range.head == end) {
|
||||
let transaction = Transaction::change(
|
||||
doc.text(),
|
||||
std::array::IntoIter::new([(end, end, Some(Tendril::from_char('\n')))]),
|
||||
std::array::IntoIter::new([(end, end, Some(doc.line_ending.as_str().into()))]),
|
||||
);
|
||||
doc.apply(&transaction, view.id);
|
||||
}
|
||||
|
@ -1131,6 +1160,45 @@ mod cmd {
|
|||
}
|
||||
}
|
||||
|
||||
/// Sets or reports the current document's line ending setting.
|
||||
fn set_line_ending(editor: &mut Editor, args: &[&str], event: PromptEvent) {
|
||||
use LineEnding::*;
|
||||
|
||||
// If no argument, report current line ending setting.
|
||||
if args.is_empty() {
|
||||
let line_ending = current!(editor).1.line_ending;
|
||||
editor.set_status(match line_ending {
|
||||
Crlf => "crlf".into(),
|
||||
LF => "line feed".into(),
|
||||
FF => "form feed".into(),
|
||||
CR => "carriage return".into(),
|
||||
Nel => "next line".into(),
|
||||
|
||||
// These should never be a document's default line ending.
|
||||
VT | LS | PS => "error".into(),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Attempt to parse argument as a line ending.
|
||||
let line_ending = match args.get(0) {
|
||||
// We check for CR first because it shares a common prefix with CRLF.
|
||||
Some(arg) if "cr".starts_with(&arg.to_lowercase()) => Some(CR),
|
||||
Some(arg) if "crlf".starts_with(&arg.to_lowercase()) => Some(Crlf),
|
||||
Some(arg) if "lf".starts_with(&arg.to_lowercase()) => Some(LF),
|
||||
Some(arg) if "ff".starts_with(&arg.to_lowercase()) => Some(FF),
|
||||
Some(arg) if "nel".starts_with(&arg.to_lowercase()) => Some(Nel),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(le) = line_ending {
|
||||
doc_mut!(editor).line_ending = le;
|
||||
} else {
|
||||
// Invalid argument.
|
||||
editor.set_error(format!("invalid line ending '{}'", args[0],));
|
||||
}
|
||||
}
|
||||
|
||||
fn earlier(editor: &mut Editor, args: &[&str], event: PromptEvent) {
|
||||
let uk = match args.join(" ").parse::<helix_core::history::UndoKind>() {
|
||||
Ok(uk) => uk,
|
||||
|
@ -1274,7 +1342,11 @@ mod cmd {
|
|||
}
|
||||
|
||||
fn yank_joined_to_clipboard(editor: &mut Editor, args: &[&str], _: PromptEvent) {
|
||||
let separator = args.first().copied().unwrap_or("\n");
|
||||
let (_, doc) = current!(editor);
|
||||
let separator = args
|
||||
.first()
|
||||
.copied()
|
||||
.unwrap_or_else(|| doc.line_ending.as_str());
|
||||
yank_joined_to_clipboard_impl(editor, separator);
|
||||
}
|
||||
|
||||
|
@ -1359,6 +1431,13 @@ mod cmd {
|
|||
fun: set_indent_style,
|
||||
completer: None,
|
||||
},
|
||||
TypableCommand {
|
||||
name: "line-ending",
|
||||
alias: None,
|
||||
doc: "Set the document's default line ending. Options: crlf, lf, cr, ff, nel.",
|
||||
fun: set_line_ending,
|
||||
completer: None,
|
||||
},
|
||||
TypableCommand {
|
||||
name: "earlier",
|
||||
alias: Some("ear"),
|
||||
|
@ -1683,8 +1762,7 @@ fn append_to_line(cx: &mut Context) {
|
|||
let selection = doc.selection(view.id).transform(|range| {
|
||||
let text = doc.text();
|
||||
let line = text.char_to_line(range.head);
|
||||
// we can't use line_to_char(line + 1) - 2 because the last line might not contain \n
|
||||
let pos = (text.line_to_char(line) + text.line(line).len_chars()).saturating_sub(1);
|
||||
let pos = line_end_char_index(&text.slice(..), line);
|
||||
Range::new(pos, pos)
|
||||
});
|
||||
doc.set_selection(view.id, selection);
|
||||
|
@ -1731,7 +1809,7 @@ fn open(cx: &mut Context, open: Open) {
|
|||
let indent = doc.indent_unit().repeat(indent_level);
|
||||
let indent_len = indent.len();
|
||||
let mut text = String::with_capacity(1 + indent_len);
|
||||
text.push('\n');
|
||||
text.push_str(doc.line_ending.as_str());
|
||||
text.push_str(&indent);
|
||||
let text = text.repeat(count);
|
||||
|
||||
|
@ -2344,7 +2422,7 @@ pub mod insert {
|
|||
);
|
||||
let indent = doc.indent_unit().repeat(indent_level);
|
||||
let mut text = String::with_capacity(1 + indent.len());
|
||||
text.push('\n');
|
||||
text.push_str(doc.line_ending.as_str());
|
||||
text.push_str(&indent);
|
||||
|
||||
let head = pos + offs + text.chars().count();
|
||||
|
@ -2365,7 +2443,7 @@ pub mod insert {
|
|||
if helix_core::auto_pairs::PAIRS.contains(&(prev, curr)) {
|
||||
// another newline, indent the end bracket one level less
|
||||
let indent = doc.indent_unit().repeat(indent_level.saturating_sub(1));
|
||||
text.push('\n');
|
||||
text.push_str(doc.line_ending.as_str());
|
||||
text.push_str(&indent);
|
||||
}
|
||||
|
||||
|
@ -2488,7 +2566,8 @@ fn yank_joined_to_clipboard_impl(editor: &mut Editor, separator: &str) {
|
|||
}
|
||||
|
||||
fn yank_joined_to_clipboard(cx: &mut Context) {
|
||||
yank_joined_to_clipboard_impl(&mut cx.editor, "\n");
|
||||
let line_ending = current!(cx.editor).1.line_ending;
|
||||
yank_joined_to_clipboard_impl(&mut cx.editor, line_ending.as_str());
|
||||
}
|
||||
|
||||
fn yank_main_selection_to_clipboard_impl(editor: &mut Editor) {
|
||||
|
@ -2529,8 +2608,10 @@ fn paste_impl(
|
|||
.unwrap(),
|
||||
);
|
||||
|
||||
// if any of values ends \n it's linewise paste
|
||||
let linewise = values.iter().any(|value| value.ends_with('\n'));
|
||||
// if any of values ends with a line ending, it's linewise paste
|
||||
let linewise = values
|
||||
.iter()
|
||||
.any(|value| get_line_ending_of_str(value).is_some());
|
||||
|
||||
let mut values = values.iter().cloned().map(Tendril::from).chain(repeat);
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ use crate::{
|
|||
use helix_core::{
|
||||
coords_at_pos,
|
||||
syntax::{self, HighlightEvent},
|
||||
Position, Range,
|
||||
LineEnding, Position, Range,
|
||||
};
|
||||
use helix_lsp::LspProgressMap;
|
||||
use helix_view::{document::Mode, Document, Editor, Theme, View};
|
||||
|
@ -184,7 +184,7 @@ impl EditorView {
|
|||
|
||||
// iterate over range char by char
|
||||
for grapheme in RopeGraphemes::new(text) {
|
||||
if grapheme == "\n" {
|
||||
if LineEnding::from_rope_slice(&grapheme).is_some() {
|
||||
visual_x = 0;
|
||||
line += 1;
|
||||
|
||||
|
|
|
@ -115,6 +115,8 @@ fn parse<'a>(
|
|||
// TODO: replace tabs with indentation
|
||||
|
||||
let mut slice = &text[start..end];
|
||||
// TODO: do we need to handle all unicode line endings
|
||||
// here, or is just '\n' okay?
|
||||
while let Some(end) = slice.find('\n') {
|
||||
// emit span up to newline
|
||||
let text = &slice[..end];
|
||||
|
|
|
@ -22,3 +22,4 @@ unicode-segmentation = "1.2"
|
|||
unicode-width = "0.1"
|
||||
crossterm = { version = "0.20", optional = true }
|
||||
serde = { version = "1", "optional" = true, features = ["derive"]}
|
||||
helix-core = { version = "0.2", path = "../helix-core" }
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
//! ]);
|
||||
//! ```
|
||||
use crate::style::Style;
|
||||
use helix_core::line_ending::str_is_line_ending;
|
||||
use std::borrow::Cow;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
@ -177,7 +178,7 @@ impl<'a> Span<'a> {
|
|||
symbol: g,
|
||||
style: base_style.patch(self.style),
|
||||
})
|
||||
.filter(|s| s.symbol != "\n")
|
||||
.filter(|s| !str_is_line_ending(s.symbol))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use crate::text::StyledGrapheme;
|
||||
use helix_core::line_ending::str_is_line_ending;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
|
@ -62,13 +63,13 @@ impl<'a, 'b> LineComposer<'a> for WordWrapper<'a, 'b> {
|
|||
// Ignore characters wider that the total max width.
|
||||
if symbol.width() as u16 > self.max_line_width
|
||||
// Skip leading whitespace when trim is enabled.
|
||||
|| self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0
|
||||
|| self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Break on newline and discard it.
|
||||
if symbol == "\n" {
|
||||
if str_is_line_ending(symbol) {
|
||||
if prev_whitespace {
|
||||
current_line_width = width_to_last_word_end;
|
||||
self.current_line.truncate(symbols_to_last_word_end);
|
||||
|
@ -170,7 +171,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
|
|||
}
|
||||
|
||||
// Break on newline and discard it.
|
||||
if symbol == "\n" {
|
||||
if str_is_line_ending(symbol) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -199,7 +200,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
|
|||
|
||||
if skip_rest {
|
||||
for StyledGrapheme { symbol, .. } in &mut self.symbols {
|
||||
if symbol == "\n" {
|
||||
if str_is_line_ending(symbol) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,10 +7,12 @@ use std::str::FromStr;
|
|||
use std::sync::Arc;
|
||||
|
||||
use helix_core::{
|
||||
chars::{char_is_linebreak, char_is_whitespace},
|
||||
chars::{char_is_line_ending, char_is_whitespace},
|
||||
history::History,
|
||||
line_ending::auto_detect_line_ending,
|
||||
syntax::{self, LanguageConfiguration},
|
||||
ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction,
|
||||
ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction,
|
||||
DEFAULT_LINE_ENDING,
|
||||
};
|
||||
|
||||
use crate::{DocumentId, Theme, ViewId};
|
||||
|
@ -45,6 +47,9 @@ pub struct Document {
|
|||
/// Current indent style.
|
||||
pub indent_style: IndentStyle,
|
||||
|
||||
/// The document's default line ending.
|
||||
pub line_ending: LineEnding,
|
||||
|
||||
syntax: Option<Syntax>,
|
||||
// /// Corresponding language scope name. Usually `source.<lang>`.
|
||||
pub(crate) language: Option<Arc<LanguageConfiguration>>,
|
||||
|
@ -232,6 +237,7 @@ impl Document {
|
|||
history: Cell::new(History::default()),
|
||||
last_saved_revision: 0,
|
||||
language_server: None,
|
||||
line_ending: DEFAULT_LINE_ENDING,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -243,22 +249,26 @@ impl Document {
|
|||
) -> Result<Self, Error> {
|
||||
use std::{fs::File, io::BufReader};
|
||||
|
||||
let doc = if !path.exists() {
|
||||
Rope::from("\n")
|
||||
let mut doc = if !path.exists() {
|
||||
Rope::from(DEFAULT_LINE_ENDING.as_str())
|
||||
} else {
|
||||
let file = File::open(&path).context(format!("unable to open {:?}", path))?;
|
||||
let mut doc = Rope::from_reader(BufReader::new(file))?;
|
||||
// add missing newline at the end of file
|
||||
if doc.len_bytes() == 0 || doc.byte(doc.len_bytes() - 1) != b'\n' {
|
||||
doc.insert_char(doc.len_chars(), '\n');
|
||||
}
|
||||
doc
|
||||
Rope::from_reader(BufReader::new(file))?
|
||||
};
|
||||
|
||||
// search for line endings
|
||||
let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING);
|
||||
|
||||
// add missing newline at the end of file
|
||||
if doc.len_bytes() == 0 || char_is_line_ending(doc.char(doc.len_chars() - 1)) {
|
||||
doc.insert(doc.len_chars(), line_ending.as_str());
|
||||
}
|
||||
|
||||
let mut doc = Self::new(doc);
|
||||
// set the path and try detecting the language
|
||||
doc.set_path(&path)?;
|
||||
doc.detect_indent_style();
|
||||
doc.line_ending = line_ending;
|
||||
|
||||
if let Some(loader) = config_loader {
|
||||
doc.detect_language(theme, loader);
|
||||
|
@ -366,7 +376,7 @@ impl Document {
|
|||
Some(' ') => false,
|
||||
|
||||
// Ignore blank lines.
|
||||
Some(c) if char_is_linebreak(c) => continue,
|
||||
Some(c) if char_is_line_ending(c) => continue,
|
||||
|
||||
_ => {
|
||||
prev_line_is_tabs = false;
|
||||
|
@ -390,7 +400,7 @@ impl Document {
|
|||
c if char_is_whitespace(c) => count_is_done = true,
|
||||
|
||||
// Ignore blank lines.
|
||||
c if char_is_linebreak(c) => continue 'outer,
|
||||
c if char_is_line_ending(c) => continue 'outer,
|
||||
|
||||
_ => break,
|
||||
}
|
||||
|
|
|
@ -15,10 +15,9 @@ use slotmap::SlotMap;
|
|||
|
||||
use anyhow::Error;
|
||||
|
||||
use helix_core::Position;
|
||||
|
||||
pub use helix_core::diagnostic::Severity;
|
||||
pub use helix_core::register::Registers;
|
||||
use helix_core::{Position, DEFAULT_LINE_ENDING};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Editor {
|
||||
|
@ -173,7 +172,7 @@ impl Editor {
|
|||
|
||||
pub fn new_file(&mut self, action: Action) -> DocumentId {
|
||||
use helix_core::Rope;
|
||||
let doc = Document::new(Rope::from("\n"));
|
||||
let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str()));
|
||||
let id = self.documents.insert(doc);
|
||||
self.documents[id].id = id;
|
||||
self.switch(id, action);
|
||||
|
|
Loading…
Reference in a new issue