Work on moving code over to LineEnding instead of assuming '\n'.

Also some general cleanup and some minor fixes along the way.
This commit is contained in:
Nathan Vegdahl 2021-06-20 15:09:10 -07:00
parent 5d22e3c4e5
commit 4efd6713c5
16 changed files with 228 additions and 190 deletions

1
Cargo.lock generated
View file

@ -331,6 +331,7 @@ dependencies = [
"bitflags", "bitflags",
"cassowary", "cassowary",
"crossterm", "crossterm",
"helix-core",
"serde", "serde",
"unicode-segmentation", "unicode-segmentation",
"unicode-width", "unicode-width",

View file

@ -12,7 +12,7 @@
('`', '`'), ('`', '`'),
]; ];
const CLOSE_BEFORE: &str = ")]}'\":;> \n"; // includes space and newline const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines
// insert hook: // insert hook:
// Fn(doc, selection, char) => Option<Transaction> // Fn(doc, selection, char) => Option<Transaction>

View file

@ -1,25 +1,44 @@
/// Determine whether a character is a line break. use crate::LineEnding;
pub fn char_is_linebreak(c: char) -> bool {
matches!( #[derive(Debug, Eq, PartialEq)]
c, pub enum CharCategory {
'\u{000A}' | // LineFeed Whitespace,
'\u{000B}' | // VerticalTab Eol,
'\u{000C}' | // FormFeed Word,
'\u{000D}' | // CarriageReturn Punctuation,
'\u{0085}' | // NextLine Unknown,
'\u{2028}' | // Line Separator }
'\u{2029}' // ParagraphSeparator
) #[inline]
pub fn categorize_char(ch: char) -> CharCategory {
if char_is_line_ending(ch) {
CharCategory::Eol
} else if ch.is_whitespace() {
CharCategory::Whitespace
} else if char_is_word(ch) {
CharCategory::Word
} else if char_is_punctuation(ch) {
CharCategory::Punctuation
} else {
CharCategory::Unknown
}
}
/// Determine whether a character is a line ending.
#[inline]
pub fn char_is_line_ending(ch: char) -> bool {
LineEnding::from_char(ch).is_some()
} }
/// Determine whether a character qualifies as (non-line-break) /// Determine whether a character qualifies as (non-line-break)
/// whitespace. /// whitespace.
pub fn char_is_whitespace(c: char) -> bool { #[inline]
pub fn char_is_whitespace(ch: char) -> bool {
// TODO: this is a naive binary categorization of whitespace // TODO: this is a naive binary categorization of whitespace
// characters. For display, word wrapping, etc. we'll need a better // characters. For display, word wrapping, etc. we'll need a better
// categorization based on e.g. breaking vs non-breaking spaces // categorization based on e.g. breaking vs non-breaking spaces
// and whether they're zero-width or not. // and whether they're zero-width or not.
match c { match ch {
//'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace) //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
'\u{0009}' | // Character Tabulation '\u{0009}' | // Character Tabulation
'\u{0020}' | // Space '\u{0020}' | // Space
@ -34,8 +53,81 @@ pub fn char_is_whitespace(c: char) -> bool {
// En Quad, Em Quad, En Space, Em Space, Three-per-em Space, // En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
// Four-per-em Space, Six-per-em Space, Figure Space, // Four-per-em Space, Six-per-em Space, Figure Space,
// Punctuation Space, Thin Space, Hair Space, Zero Width Space. // Punctuation Space, Thin Space, Hair Space, Zero Width Space.
c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true, ch if ('\u{2000}' ..= '\u{200B}').contains(&ch) => true,
_ => false, _ => false,
} }
} }
#[inline]
pub fn char_is_punctuation(ch: char) -> bool {
use unicode_general_category::{get_general_category, GeneralCategory};
matches!(
get_general_category(ch),
GeneralCategory::OtherPunctuation
| GeneralCategory::OpenPunctuation
| GeneralCategory::ClosePunctuation
| GeneralCategory::InitialPunctuation
| GeneralCategory::FinalPunctuation
| GeneralCategory::ConnectorPunctuation
| GeneralCategory::DashPunctuation
| GeneralCategory::MathSymbol
| GeneralCategory::CurrencySymbol
| GeneralCategory::ModifierSymbol
)
}
#[inline]
pub fn char_is_word(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_'
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_categorize() {
const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}";
const WORD_TEST_CASE: &'static str =
"_hello_world_あいうえおー1234567890";
const PUNCTUATION_TEST_CASE: &'static str =
"!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
const WHITESPACE_TEST_CASE: &'static str = "  ";
for ch in EOL_TEST_CASE.chars() {
assert_eq!(CharCategory::Eol, categorize_char(ch));
}
for ch in WHITESPACE_TEST_CASE.chars() {
assert_eq!(
CharCategory::Whitespace,
categorize_char(ch),
"Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
ch,
categorize_char(ch)
);
}
for ch in WORD_TEST_CASE.chars() {
assert_eq!(
CharCategory::Word,
categorize_char(ch),
"Testing '{}', but got `{:?}` instead of `Category::Word`",
ch,
categorize_char(ch)
);
}
for ch in PUNCTUATION_TEST_CASE.chars() {
assert_eq!(
CharCategory::Punctuation,
categorize_char(ch),
"Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
ch,
categorize_char(ch)
);
}
}
}

View file

@ -113,6 +113,6 @@ pub fn cache_dir() -> std::path::PathBuf {
pub use state::State; pub use state::State;
pub use line_ending::{ pub use line_ending::{
auto_detect_line_ending, get_line_ending, line_end, LineEnding, DEFAULT_LINE_ENDING, auto_detect_line_ending, get_line_ending, line_end_char_index, LineEnding, DEFAULT_LINE_ENDING,
}; };
pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction};

View file

@ -1,5 +1,10 @@
use crate::{Rope, RopeGraphemes, RopeSlice}; use crate::{Rope, RopeGraphemes, RopeSlice};
#[cfg(target_os = "windows")]
pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
#[cfg(not(target_os = "windows"))]
pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
/// Represents one of the valid Unicode line endings. /// Represents one of the valid Unicode line endings.
#[derive(PartialEq, Copy, Clone, Debug)] #[derive(PartialEq, Copy, Clone, Debug)]
pub enum LineEnding { pub enum LineEnding {
@ -14,6 +19,7 @@ pub enum LineEnding {
} }
impl LineEnding { impl LineEnding {
#[inline]
pub fn len_chars(&self) -> usize { pub fn len_chars(&self) -> usize {
match self { match self {
Self::Crlf => 2, Self::Crlf => 2,
@ -21,6 +27,7 @@ pub fn len_chars(&self) -> usize {
} }
} }
#[inline]
pub fn as_str(&self) -> &'static str { pub fn as_str(&self) -> &'static str {
match self { match self {
Self::Crlf => "\u{000D}\u{000A}", Self::Crlf => "\u{000D}\u{000A}",
@ -34,6 +41,22 @@ pub fn as_str(&self) -> &'static str {
} }
} }
#[inline]
pub fn from_char(ch: char) -> Option<LineEnding> {
match ch {
'\u{000A}' => Some(LineEnding::LF),
'\u{000B}' => Some(LineEnding::VT),
'\u{000C}' => Some(LineEnding::FF),
'\u{000D}' => Some(LineEnding::CR),
'\u{0085}' => Some(LineEnding::Nel),
'\u{2028}' => Some(LineEnding::LS),
'\u{2029}' => Some(LineEnding::PS),
// Not a line ending
_ => None,
}
}
#[inline]
pub fn from_str(g: &str) -> Option<LineEnding> { pub fn from_str(g: &str) -> Option<LineEnding> {
match g { match g {
"\u{000D}\u{000A}" => Some(LineEnding::Crlf), "\u{000D}\u{000A}" => Some(LineEnding::Crlf),
@ -49,6 +72,7 @@ pub fn from_str(g: &str) -> Option<LineEnding> {
} }
} }
#[inline]
pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> { pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> {
if let Some(text) = g.as_str() { if let Some(text) = g.as_str() {
LineEnding::from_str(text) LineEnding::from_str(text)
@ -62,6 +86,11 @@ pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> {
} }
} }
#[inline]
pub fn str_is_line_ending(s: &str) -> bool {
LineEnding::from_str(s).is_some()
}
/// Attempts to detect what line ending the passed document uses. /// Attempts to detect what line ending the passed document uses.
pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> { pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> {
// Return first matched line ending. Not all possible line endings // Return first matched line ending. Not all possible line endings
@ -96,19 +125,13 @@ pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> {
} }
/// Returns the char index of the end of the given line, not including its line ending. /// Returns the char index of the end of the given line, not including its line ending.
pub fn line_end(slice: &RopeSlice, line: usize) -> usize { pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {
slice.line_to_char(line + 1).saturating_sub( slice.line_to_char(line + 1)
get_line_ending(&slice.line(line)) - get_line_ending(&slice.line(line))
.map(|le| le.len_chars()) .map(|le| le.len_chars())
.unwrap_or(0), .unwrap_or(0)
)
} }
#[cfg(target_os = "windows")]
pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
#[cfg(not(target_os = "windows"))]
pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
#[cfg(test)] #[cfg(test)]
mod line_ending_tests { mod line_ending_tests {
use super::*; use super::*;
@ -150,11 +173,11 @@ fn test_autodetect() {
fn test_rope_slice_to_line_ending() { fn test_rope_slice_to_line_ending() {
let r = Rope::from_str("\r\n"); let r = Rope::from_str("\r\n");
assert_eq!( assert_eq!(
rope_slice_to_line_ending(&r.slice(1..2)), LineEnding::from_rope_slice(&r.slice(1..2)),
Some(LineEnding::LF) Some(LineEnding::LF)
); );
assert_eq!( assert_eq!(
rope_slice_to_line_ending(&r.slice(0..2)), LineEnding::from_rope_slice(&r.slice(0..2)),
Some(LineEnding::Crlf) Some(LineEnding::Crlf)
); );
} }

View file

@ -3,9 +3,13 @@
use ropey::iter::Chars; use ropey::iter::Chars;
use crate::{ use crate::{
chars::{
categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace,
char_is_word, CharCategory,
},
coords_at_pos, get_line_ending, coords_at_pos, get_line_ending,
graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary},
line_end, pos_at_coords, Position, Range, RopeSlice, line_end_char_index, pos_at_coords, Position, Range, RopeSlice,
}; };
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -37,9 +41,8 @@ pub fn move_horizontally(
nth_prev_grapheme_boundary(slice, pos, count).max(start) nth_prev_grapheme_boundary(slice, pos, count).max(start)
} }
Direction::Forward => { Direction::Forward => {
// Line end is pos at the start of next line - 1 let end_char_idx = line_end_char_index(&slice, line);
let end = line_end(&slice, line); nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx)
nth_next_grapheme_boundary(slice, pos, count).min(end)
} }
}; };
let anchor = match behaviour { let anchor = match behaviour {
@ -68,8 +71,11 @@ pub fn move_vertically(
), ),
}; };
// convert to 0-indexed, subtract another 1 because len_chars() counts \n // Length of the line sans line-ending.
let new_line_len = slice.line(new_line).len_chars().saturating_sub(2); let new_line_len = {
let line = slice.line(new_line);
line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0)
};
let new_col = std::cmp::min(horiz as usize, new_line_len); let new_col = std::cmp::min(horiz as usize, new_line_len);
@ -104,64 +110,6 @@ fn word_move(slice: RopeSlice, mut range: Range, count: usize, target: WordMotio
} }
// ---- util ------------ // ---- util ------------
#[inline]
pub(crate) fn is_word(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_'
}
#[inline]
pub(crate) fn is_end_of_line(ch: char) -> bool {
ch == '\n'
}
#[inline]
// Whitespace, but not end of line
pub(crate) fn is_strict_whitespace(ch: char) -> bool {
ch.is_whitespace() && !is_end_of_line(ch)
}
#[inline]
pub(crate) fn is_punctuation(ch: char) -> bool {
use unicode_general_category::{get_general_category, GeneralCategory};
matches!(
get_general_category(ch),
GeneralCategory::OtherPunctuation
| GeneralCategory::OpenPunctuation
| GeneralCategory::ClosePunctuation
| GeneralCategory::InitialPunctuation
| GeneralCategory::FinalPunctuation
| GeneralCategory::ConnectorPunctuation
| GeneralCategory::DashPunctuation
| GeneralCategory::MathSymbol
| GeneralCategory::CurrencySymbol
| GeneralCategory::ModifierSymbol
)
}
#[derive(Debug, Eq, PartialEq)]
pub enum Category {
Whitespace,
Eol,
Word,
Punctuation,
Unknown,
}
#[inline]
pub(crate) fn categorize(ch: char) -> Category {
if is_end_of_line(ch) {
Category::Eol
} else if ch.is_whitespace() {
Category::Whitespace
} else if is_word(ch) {
Category::Word
} else if is_punctuation(ch) {
Category::Punctuation
} else {
Category::Unknown
}
}
#[inline] #[inline]
/// Returns first index that doesn't satisfy a given predicate when /// Returns first index that doesn't satisfy a given predicate when
@ -235,7 +183,8 @@ fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range
let mut phase = WordMotionPhase::Start; let mut phase = WordMotionPhase::Start;
let mut head = origin.head; let mut head = origin.head;
let mut anchor: Option<usize> = None; let mut anchor: Option<usize> = None;
let is_boundary = |a: char, b: Option<char>| categorize(a) != categorize(b.unwrap_or(a)); let is_boundary =
|a: char, b: Option<char>| categorize_char(a) != categorize_char(b.unwrap_or(a));
while let Some(peek) = characters.peek().copied() { while let Some(peek) = characters.peek().copied() {
phase = match phase { phase = match phase {
WordMotionPhase::Start => { WordMotionPhase::Start => {
@ -244,7 +193,8 @@ fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range
break; // We're at the end, so there's nothing to do. break; // We're at the end, so there's nothing to do.
} }
// Anchor may remain here if the head wasn't at a boundary // Anchor may remain here if the head wasn't at a boundary
if !is_boundary(peek, characters.peek().copied()) && !is_end_of_line(peek) { if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek)
{
anchor = Some(head); anchor = Some(head);
} }
// First character is always skipped by the head // First character is always skipped by the head
@ -252,7 +202,7 @@ fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range
WordMotionPhase::SkipNewlines WordMotionPhase::SkipNewlines
} }
WordMotionPhase::SkipNewlines => { WordMotionPhase::SkipNewlines => {
if is_end_of_line(peek) { if char_is_line_ending(peek) {
characters.next(); characters.next();
if characters.peek().is_some() { if characters.peek().is_some() {
advance(&mut head); advance(&mut head);
@ -286,12 +236,12 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
match target { match target {
WordMotionTarget::NextWordStart => { WordMotionTarget::NextWordStart => {
((categorize(peek) != categorize(*next_peek)) ((categorize_char(peek) != categorize_char(*next_peek))
&& (is_end_of_line(*next_peek) || !next_peek.is_whitespace())) && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()))
} }
WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => { WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => {
((categorize(peek) != categorize(*next_peek)) ((categorize_char(peek) != categorize_char(*next_peek))
&& (!peek.is_whitespace() || is_end_of_line(*next_peek))) && (!peek.is_whitespace() || char_is_line_ending(*next_peek)))
} }
} }
} }
@ -330,7 +280,7 @@ fn test_vertical_move() {
slice, slice,
move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head
), ),
(1, 2).into() (1, 3).into()
); );
} }
@ -343,12 +293,12 @@ fn horizontal_moves_through_single_line_in_single_line_text() {
let mut range = Range::point(position); let mut range = Range::point(position);
let moves_and_expected_coordinates = [ let moves_and_expected_coordinates = [
((Direction::Forward, 1usize), (0, 1)), ((Direction::Forward, 1usize), (0, 1)), // T|his is a simple alphabetic line
((Direction::Forward, 2usize), (0, 3)), ((Direction::Forward, 2usize), (0, 3)), // Thi|s is a simple alphabetic line
((Direction::Forward, 0usize), (0, 3)), ((Direction::Forward, 0usize), (0, 3)), // Thi|s is a simple alphabetic line
((Direction::Forward, 999usize), (0, 31)), ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
((Direction::Forward, 999usize), (0, 31)), ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
((Direction::Backward, 999usize), (0, 0)), ((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line
]; ];
for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) { for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) {
@ -366,15 +316,15 @@ fn horizontal_moves_through_single_line_in_multiline_text() {
let mut range = Range::point(position); let mut range = Range::point(position);
let moves_and_expected_coordinates = IntoIter::new([ let moves_and_expected_coordinates = IntoIter::new([
((Direction::Forward, 1usize), (0, 1)), // M_ltiline ((Direction::Forward, 1usize), (0, 1)), // M|ultiline\n
((Direction::Forward, 2usize), (0, 3)), // Mul_iline ((Direction::Forward, 2usize), (0, 3)), // Mul|tiline\n
((Direction::Backward, 6usize), (0, 0)), // _ultiline ((Direction::Backward, 6usize), (0, 0)), // |Multiline\n
((Direction::Backward, 999usize), (0, 0)), // _ultiline ((Direction::Backward, 999usize), (0, 0)), // |Multiline\n
((Direction::Forward, 3usize), (0, 3)), // Mul_iline ((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\n
((Direction::Forward, 0usize), (0, 3)), // Mul_iline ((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\n
((Direction::Backward, 0usize), (0, 3)), // Mul_iline ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\n
((Direction::Forward, 999usize), (0, 9)), // Multilin_ ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n
((Direction::Forward, 999usize), (0, 9)), // Multilin_ ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n
]); ]);
for ((direction, amount), coordinates) in moves_and_expected_coordinates { for ((direction, amount), coordinates) in moves_and_expected_coordinates {
@ -446,7 +396,7 @@ enum Axis {
// First descent preserves column as the target line is wider // First descent preserves column as the target line is wider
((Axis::V, Direction::Forward, 1usize), (1, 8)), ((Axis::V, Direction::Forward, 1usize), (1, 8)),
// Second descent clamps column as the target line is shorter // Second descent clamps column as the target line is shorter
((Axis::V, Direction::Forward, 1usize), (2, 4)), ((Axis::V, Direction::Forward, 1usize), (2, 5)),
// Third descent restores the original column // Third descent restores the original column
((Axis::V, Direction::Forward, 1usize), (3, 8)), ((Axis::V, Direction::Forward, 1usize), (3, 8)),
// Behaviour is preserved even through long jumps // Behaviour is preserved even through long jumps
@ -760,45 +710,4 @@ fn test_behaviour_when_moving_to_end_of_next_words() {
} }
} }
} }
#[test]
fn test_categorize() {
const WORD_TEST_CASE: &'static str =
"_hello_world_あいうえおー1234567890";
const PUNCTUATION_TEST_CASE: &'static str =
"!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
const WHITESPACE_TEST_CASE: &'static str = "  ";
assert_eq!(Category::Eol, categorize('\n'));
for ch in WHITESPACE_TEST_CASE.chars() {
assert_eq!(
Category::Whitespace,
categorize(ch),
"Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
ch,
categorize(ch)
);
}
for ch in WORD_TEST_CASE.chars() {
assert_eq!(
Category::Word,
categorize(ch),
"Testing '{}', but got `{:?}` instead of `Category::Word`",
ch,
categorize(ch)
);
}
for ch in PUNCTUATION_TEST_CASE.chars() {
assert_eq!(
Category::Punctuation,
categorize(ch),
"Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
ch,
categorize(ch)
);
}
}
} }

View file

@ -1,4 +1,5 @@
use crate::{ use crate::{
chars::char_is_line_ending,
graphemes::{nth_next_grapheme_boundary, RopeGraphemes}, graphemes::{nth_next_grapheme_boundary, RopeGraphemes},
Rope, RopeSlice, Rope, RopeSlice,
}; };
@ -23,8 +24,9 @@ pub const fn is_zero(self) -> bool {
pub fn traverse(self, text: &crate::Tendril) -> Self { pub fn traverse(self, text: &crate::Tendril) -> Self {
let Self { mut row, mut col } = self; let Self { mut row, mut col } = self;
// TODO: there should be a better way here // TODO: there should be a better way here
for ch in text.chars() { let mut chars = text.chars().peekable();
if ch == '\n' { while let Some(ch) = chars.next() {
if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
row += 1; row += 1;
col = 0; col = 0;
} else { } else {

View file

@ -1,4 +1,4 @@
use crate::{regex::Regex, Change, Rope, RopeSlice, Transaction}; use crate::{chars::char_is_line_ending, regex::Regex, Change, Rope, RopeSlice, Transaction};
pub use helix_syntax::{get_language, get_language_name, Lang}; pub use helix_syntax::{get_language, get_language_name, Lang};
use std::{ use std::{
@ -579,9 +579,10 @@ fn traverse(point: Point, text: &Tendril) -> Point {
mut column, mut column,
} = point; } = point;
// TODO: there should be a better way here // TODO: there should be a better way here.
for ch in text.bytes() { let mut chars = text.chars().peekable();
if ch == b'\n' { while let Some(ch) = chars.next() {
if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
row += 1; row += 1;
column = 0; column = 0;
} else { } else {

View file

@ -3,7 +3,7 @@
Call, Error, OffsetEncoding, Result, Call, Error, OffsetEncoding, Result,
}; };
use helix_core::{find_root, ChangeSet, Rope}; use helix_core::{chars::char_is_line_ending, find_root, ChangeSet, Rope};
use jsonrpc_core as jsonrpc; use jsonrpc_core as jsonrpc;
use lsp_types as lsp; use lsp_types as lsp;
use serde_json::Value; use serde_json::Value;
@ -337,8 +337,9 @@ fn traverse(pos: lsp::Position, text: RopeSlice) -> lsp::Position {
mut character, mut character,
} = pos; } = pos;
for ch in text.chars() { let mut chars = text.chars().peekable();
if ch == '\n' { while let Some(ch) = chars.next() {
if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
line += 1; line += 1;
character = 0; character = 0;
} else { } else {

View file

@ -1,6 +1,6 @@
use helix_core::{ use helix_core::{
comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes, comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes,
indent, line_end, match_brackets, indent, line_end_char_index, match_brackets,
movement::{self, Direction}, movement::{self, Direction},
object, pos_at_coords, object, pos_at_coords,
regex::{self, Regex}, regex::{self, Regex},
@ -342,7 +342,7 @@ fn move_line_end(cx: &mut Context) {
let text = doc.text(); let text = doc.text();
let line = text.char_to_line(range.head); let line = text.char_to_line(range.head);
let pos = line_end(&text.slice(..), line); let pos = line_end_char_index(&text.slice(..), line);
Range::new(pos, pos) Range::new(pos, pos)
}); });
@ -490,6 +490,8 @@ fn find_char_impl<F>(cx: &mut Context, search_fn: F, inclusive: bool, extend: bo
let count = cx.count(); let count = cx.count();
// need to wait for next key // need to wait for next key
// TODO: should this be done by grapheme rather than char? For example,
// we can't properly handle the line-ending case here in terms of char.
cx.on_next_key(move |cx, event| { cx.on_next_key(move |cx, event| {
let ch = match event { let ch = match event {
KeyEvent { KeyEvent {
@ -623,7 +625,7 @@ fn replace(cx: &mut Context) {
KeyEvent { KeyEvent {
code: KeyCode::Enter, code: KeyCode::Enter,
.. ..
} => Some('\n'), // TODO: replace this with DEFAULT_LINE_ENDING } => Some('\n'), // TODO: use the document's default line ending.
_ => None, _ => None,
}; };
@ -763,7 +765,7 @@ fn extend_line_end(cx: &mut Context) {
let text = doc.text(); let text = doc.text();
let line = text.char_to_line(range.head); let line = text.char_to_line(range.head);
let pos = line_end(&text.slice(..), line); let pos = line_end_char_index(&text.slice(..), line);
Range::new(range.anchor, pos) Range::new(range.anchor, pos)
}); });
@ -1642,7 +1644,7 @@ fn append_to_line(cx: &mut Context) {
let selection = doc.selection(view.id).transform(|range| { let selection = doc.selection(view.id).transform(|range| {
let text = doc.text(); let text = doc.text();
let line = text.char_to_line(range.head); let line = text.char_to_line(range.head);
let pos = line_end(&text.slice(..), line); let pos = line_end_char_index(&text.slice(..), line);
Range::new(pos, pos) Range::new(pos, pos)
}); });
doc.set_selection(view.id, selection); doc.set_selection(view.id, selection);

View file

@ -110,6 +110,8 @@ fn to_span(text: pulldown_cmark::CowStr) -> Span {
// TODO: replace tabs with indentation // TODO: replace tabs with indentation
let mut slice = &text[start..end]; let mut slice = &text[start..end];
// TODO: do we need to handle all unicode line endings
// here, or is just '\n' okay?
while let Some(end) = slice.find('\n') { while let Some(end) = slice.find('\n') {
// emit span up to newline // emit span up to newline
let text = &slice[..end]; let text = &slice[..end];

View file

@ -22,3 +22,4 @@ unicode-segmentation = "1.2"
unicode-width = "0.1" unicode-width = "0.1"
crossterm = { version = "0.20", optional = true } crossterm = { version = "0.20", optional = true }
serde = { version = "1", "optional" = true, features = ["derive"]} serde = { version = "1", "optional" = true, features = ["derive"]}
helix-core = { version = "0.2", path = "../helix-core" }

View file

@ -47,6 +47,7 @@
//! ]); //! ]);
//! ``` //! ```
use crate::style::Style; use crate::style::Style;
use helix_core::line_ending::str_is_line_ending;
use std::borrow::Cow; use std::borrow::Cow;
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr; use unicode_width::UnicodeWidthStr;
@ -177,7 +178,7 @@ pub fn styled_graphemes(
symbol: g, symbol: g,
style: base_style.patch(self.style), style: base_style.patch(self.style),
}) })
.filter(|s| s.symbol != "\n") .filter(|s| !str_is_line_ending(s.symbol))
} }
} }

View file

@ -1,4 +1,5 @@
use crate::text::StyledGrapheme; use crate::text::StyledGrapheme;
use helix_core::line_ending::str_is_line_ending;
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr; use unicode_width::UnicodeWidthStr;
@ -62,13 +63,13 @@ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> {
// Ignore characters wider that the total max width. // Ignore characters wider that the total max width.
if symbol.width() as u16 > self.max_line_width if symbol.width() as u16 > self.max_line_width
// Skip leading whitespace when trim is enabled. // Skip leading whitespace when trim is enabled.
|| self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0 || self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0
{ {
continue; continue;
} }
// Break on newline and discard it. // Break on newline and discard it.
if symbol == "\n" { if str_is_line_ending(symbol) {
if prev_whitespace { if prev_whitespace {
current_line_width = width_to_last_word_end; current_line_width = width_to_last_word_end;
self.current_line.truncate(symbols_to_last_word_end); self.current_line.truncate(symbols_to_last_word_end);
@ -170,7 +171,7 @@ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> {
} }
// Break on newline and discard it. // Break on newline and discard it.
if symbol == "\n" { if str_is_line_ending(symbol) {
break; break;
} }
@ -199,7 +200,7 @@ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> {
if skip_rest { if skip_rest {
for StyledGrapheme { symbol, .. } in &mut self.symbols { for StyledGrapheme { symbol, .. } in &mut self.symbols {
if symbol == "\n" { if str_is_line_ending(symbol) {
break; break;
} }
} }

View file

@ -10,7 +10,7 @@
use helix_core::{ use helix_core::{
auto_detect_line_ending, auto_detect_line_ending,
chars::{char_is_linebreak, char_is_whitespace}, chars::{char_is_line_ending, char_is_whitespace},
history::History, history::History,
syntax::{LanguageConfiguration, LOADER}, syntax::{LanguageConfiguration, LOADER},
ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction,
@ -81,6 +81,9 @@ pub struct Document {
/// Current indent style. /// Current indent style.
pub indent_style: IndentStyle, pub indent_style: IndentStyle,
/// The document's default line ending.
pub line_ending: LineEnding,
syntax: Option<Syntax>, syntax: Option<Syntax>,
// /// Corresponding language scope name. Usually `source.<lang>`. // /// Corresponding language scope name. Usually `source.<lang>`.
pub(crate) language: Option<Arc<LanguageConfiguration>>, pub(crate) language: Option<Arc<LanguageConfiguration>>,
@ -99,7 +102,6 @@ pub struct Document {
diagnostics: Vec<Diagnostic>, diagnostics: Vec<Diagnostic>,
language_server: Option<Arc<helix_lsp::Client>>, language_server: Option<Arc<helix_lsp::Client>>,
line_ending: LineEnding,
} }
use std::fmt; use std::fmt;
@ -254,21 +256,21 @@ pub fn new(text: Rope) -> Self {
pub fn load(path: PathBuf) -> Result<Self, Error> { pub fn load(path: PathBuf) -> Result<Self, Error> {
use std::{fs::File, io::BufReader}; use std::{fs::File, io::BufReader};
let doc = if !path.exists() { let mut doc = if !path.exists() {
Rope::from(DEFAULT_LINE_ENDING.as_str()) Rope::from(DEFAULT_LINE_ENDING.as_str())
} else { } else {
let file = File::open(&path).context(format!("unable to open {:?}", path))?; let file = File::open(&path).context(format!("unable to open {:?}", path))?;
let mut doc = Rope::from_reader(BufReader::new(file))?; Rope::from_reader(BufReader::new(file))?
// add missing newline at the end of file
if doc.len_bytes() == 0 || doc.byte(doc.len_bytes() - 1) != b'\n' {
doc.insert_char(doc.len_chars(), '\n');
}
doc
}; };
// search for line endings // search for line endings
let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING); let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING);
// add missing newline at the end of file
if doc.len_bytes() == 0 || char_is_line_ending(doc.char(doc.len_chars() - 1)) {
doc.insert(doc.len_chars(), line_ending.as_str());
}
let mut doc = Self::new(doc); let mut doc = Self::new(doc);
// set the path and try detecting the language // set the path and try detecting the language
doc.set_path(&path)?; doc.set_path(&path)?;
@ -379,7 +381,7 @@ fn detect_indent_style(&mut self) {
Some(' ') => false, Some(' ') => false,
// Ignore blank lines. // Ignore blank lines.
Some(c) if char_is_linebreak(c) => continue, Some(c) if char_is_line_ending(c) => continue,
_ => { _ => {
prev_line_is_tabs = false; prev_line_is_tabs = false;
@ -403,7 +405,7 @@ fn detect_indent_style(&mut self) {
c if char_is_whitespace(c) => count_is_done = true, c if char_is_whitespace(c) => count_is_done = true,
// Ignore blank lines. // Ignore blank lines.
c if char_is_linebreak(c) => continue 'outer, c if char_is_line_ending(c) => continue 'outer,
_ => break, _ => break,
} }

View file

@ -12,7 +12,7 @@
pub use helix_core::diagnostic::Severity; pub use helix_core::diagnostic::Severity;
pub use helix_core::register::Registers; pub use helix_core::register::Registers;
use helix_core::Position; use helix_core::{Position, DEFAULT_LINE_ENDING};
#[derive(Debug)] #[derive(Debug)]
pub struct Editor { pub struct Editor {
@ -150,7 +150,7 @@ pub fn switch(&mut self, id: DocumentId, action: Action) {
pub fn new_file(&mut self, action: Action) -> DocumentId { pub fn new_file(&mut self, action: Action) -> DocumentId {
use helix_core::Rope; use helix_core::Rope;
let doc = Document::new(Rope::from("\n")); let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str()));
let id = self.documents.insert(doc); let id = self.documents.insert(doc);
self.documents[id].id = id; self.documents[id].id = id;
self.switch(id, action); self.switch(id, action);