From 5d22e3c4e574eb24260966de7f20f582e6184e24 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Sun, 20 Jun 2021 00:40:41 -0700 Subject: [PATCH] Misc fixes and clean up of line ending detect code. --- helix-core/src/lib.rs | 3 +- helix-core/src/line_ending.rs | 111 +++++++++++++++------------------- helix-core/src/movement.rs | 4 +- helix-term/src/ui/editor.rs | 6 +- helix-view/src/document.rs | 2 +- 5 files changed, 56 insertions(+), 70 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index d99bb66d..e00e56be 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -113,7 +113,6 @@ pub use diagnostic::Diagnostic; pub use state::State; pub use line_ending::{ - auto_detect_line_ending, get_line_ending, rope_slice_to_line_ending, LineEnding, - DEFAULT_LINE_ENDING, line_end + auto_detect_line_ending, get_line_ending, line_end, LineEnding, DEFAULT_LINE_ENDING, }; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index f6118493..45e20c88 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -5,11 +5,11 @@ use crate::{Rope, RopeGraphemes, RopeSlice}; pub enum LineEnding { Crlf, // CarriageReturn followed by LineFeed LF, // U+000A -- LineFeed + VT, // U+000B -- VerticalTab + FF, // U+000C -- FormFeed CR, // U+000D -- CarriageReturn Nel, // U+0085 -- NextLine LS, // U+2028 -- Line Separator - VT, // U+000B -- VerticalTab - FF, // U+000C -- FormFeed PS, // U+2029 -- ParagraphSeparator } @@ -21,74 +21,58 @@ impl LineEnding { } } - pub fn as_str(&self) -> &str { + pub fn as_str(&self) -> &'static str { match self { Self::Crlf => "\u{000D}\u{000A}", Self::LF => "\u{000A}", - Self::Nel => "\u{0085}", - Self::LS => "\u{2028}", - Self::CR => "\u{000D}", Self::VT => "\u{000B}", Self::FF => "\u{000C}", + Self::CR => "\u{000D}", + Self::Nel => "\u{0085}", + Self::LS => "\u{2028}", Self::PS => "\u{2029}", } } -} -pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option { - if let Some(text) = g.as_str() { - str_to_line_ending(text) - } - else { - // Not a line ending - None - } -} - -pub fn str_to_line_ending(g: &str) -> Option { - match g { - "\u{000D}\u{000A}" => Some(LineEnding::Crlf), - "\u{000A}" => Some(LineEnding::LF), - "\u{000D}" => Some(LineEnding::CR), - "\u{0085}" => Some(LineEnding::Nel), - "\u{2028}" => Some(LineEnding::LS), - "\u{000B}" => Some(LineEnding::VT), - "\u{000C}" => Some(LineEnding::FF), - "\u{2029}" => Some(LineEnding::PS), - // Not a line ending - _ => None, - } -} - -pub fn auto_detect_line_ending(doc: &Rope) -> Option { - // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 - - let mut ending = None; - // return first matched line ending. Not all possible line endings are being matched, as they might be special-use only - for line in doc.lines().take(100) { - ending = match line.len_chars() { - 1 => { - let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) - .last() - .unwrap(); - rope_slice_to_line_ending(&g) - } - n if n > 1 => { - let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) - .last() - .unwrap(); - rope_slice_to_line_ending(&g) - } + pub fn from_str(g: &str) -> Option { + match g { + "\u{000D}\u{000A}" => Some(LineEnding::Crlf), + "\u{000A}" => Some(LineEnding::LF), + "\u{000B}" => Some(LineEnding::VT), + "\u{000C}" => Some(LineEnding::FF), + "\u{000D}" => Some(LineEnding::CR), + "\u{0085}" => Some(LineEnding::Nel), + "\u{2028}" => Some(LineEnding::LS), + "\u{2029}" => Some(LineEnding::PS), + // Not a line ending _ => None, - }; - if ending.is_some() { - match ending { - Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {} - _ => return ending, - } } } - ending + + pub fn from_rope_slice(g: &RopeSlice) -> Option { + if let Some(text) = g.as_str() { + LineEnding::from_str(text) + } else { + // Non-contiguous, so it can't be a line ending. + // Specifically, Ropey guarantees that CRLF is always + // contiguous. And the remaining line endings are all + // single `char`s, and therefore trivially contiguous. + None + } + } +} + +/// Attempts to detect what line ending the passed document uses. +pub fn auto_detect_line_ending(doc: &Rope) -> Option { + // Return first matched line ending. Not all possible line endings + // are being matched, as they might be special-use only + for line in doc.lines().take(100) { + match get_line_ending(&line) { + None | Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {} + ending => return ending, + } + } + None } /// Returns the passed line's line ending, if any. @@ -108,13 +92,16 @@ pub fn get_line_ending(line: &RopeSlice) -> Option { .unwrap_or(""); // First check the two-character case for CRLF, then check the single-character case. - str_to_line_ending(g2).or_else(|| str_to_line_ending(g1)) + LineEnding::from_str(g2).or_else(|| LineEnding::from_str(g1)) } +/// Returns the char index of the end of the given line, not including its line ending. pub fn line_end(slice: &RopeSlice, line: usize) -> usize { - slice.line_to_char(line + 1).saturating_sub(get_line_ending(&slice.line(line)) - .map(|le| le.len_chars()) - .unwrap_or(0)) + slice.line_to_char(line + 1).saturating_sub( + get_line_ending(&slice.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0), + ) } #[cfg(target_os = "windows")] diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index a3cd9b96..7f47e662 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -3,9 +3,9 @@ use std::iter::{self, from_fn, Peekable, SkipWhile}; use ropey::iter::Chars; use crate::{ - coords_at_pos, + coords_at_pos, get_line_ending, graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, - pos_at_coords, Position, Range, RopeSlice, get_line_ending, line_end + line_end, pos_at_coords, Position, Range, RopeSlice, }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 42bb3ba8..da8f0f53 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -7,9 +7,9 @@ use crate::{ }; use helix_core::{ - coords_at_pos, rope_slice_to_line_ending, + coords_at_pos, syntax::{self, HighlightEvent}, - Position, Range, + LineEnding, Position, Range, }; use helix_view::input::{KeyCode, KeyEvent, KeyModifiers}; use helix_view::{document::Mode, Document, Editor, Theme, View}; @@ -177,7 +177,7 @@ impl EditorView { // iterate over range char by char for grapheme in RopeGraphemes::new(text) { - if rope_slice_to_line_ending(&grapheme).is_some() { + if LineEnding::from_rope_slice(&grapheme).is_some() { visual_x = 0; line += 1; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 44d50583..80be1ed2 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -255,7 +255,7 @@ impl Document { use std::{fs::File, io::BufReader}; let doc = if !path.exists() { - Rope::from(DEFAULT_LINE_ENDING.as_str()) + Rope::from(DEFAULT_LINE_ENDING.as_str()) } else { let file = File::open(&path).context(format!("unable to open {:?}", path))?; let mut doc = Rope::from_reader(BufReader::new(file))?;