2021-07-01 21:24:22 +02:00
use crate ::{ Rope , RopeSlice } ;
2021-06-16 17:05:14 +02:00
2021-06-21 00:09:10 +02:00
#[ cfg(target_os = " windows " ) ]
pub const DEFAULT_LINE_ENDING : LineEnding = LineEnding ::Crlf ;
#[ cfg(not(target_os = " windows " )) ]
pub const DEFAULT_LINE_ENDING : LineEnding = LineEnding ::LF ;
2021-06-16 17:05:14 +02:00
/// Represents one of the valid Unicode line endings.
#[ derive(PartialEq, Copy, Clone, Debug) ]
pub enum LineEnding {
Crlf , // CarriageReturn followed by LineFeed
LF , // U+000A -- LineFeed
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
VT , // U+000B -- VerticalTab
#[ cfg(feature = " unicode-lines " ) ]
FF , // U+000C -- FormFeed
#[ cfg(feature = " unicode-lines " ) ]
CR , // U+000D -- CarriageReturn
#[ cfg(feature = " unicode-lines " ) ]
Nel , // U+0085 -- NextLine
#[ cfg(feature = " unicode-lines " ) ]
LS , // U+2028 -- Line Separator
#[ cfg(feature = " unicode-lines " ) ]
PS , // U+2029 -- ParagraphSeparator
2021-06-16 17:05:14 +02:00
}
2021-06-17 13:49:50 +02:00
impl LineEnding {
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-10-24 10:28:29 +02:00
pub const fn len_chars ( & self ) -> usize {
2021-06-17 13:49:50 +02:00
match self {
Self ::Crlf = > 2 ,
_ = > 1 ,
}
}
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-10-24 10:28:29 +02:00
pub const fn as_str ( & self ) -> & 'static str {
2021-06-17 13:49:50 +02:00
match self {
Self ::Crlf = > " \u{000D} \u{000A} " ,
Self ::LF = > " \u{000A} " ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-19 14:03:14 +02:00
Self ::VT = > " \u{000B} " ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-19 14:03:14 +02:00
Self ::FF = > " \u{000C} " ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
Self ::CR = > " \u{000D} " ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
Self ::Nel = > " \u{0085} " ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
Self ::LS = > " \u{2028} " ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-19 14:03:14 +02:00
Self ::PS = > " \u{2029} " ,
2021-06-17 13:49:50 +02:00
}
}
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-10-24 10:28:29 +02:00
pub const fn from_char ( ch : char ) -> Option < LineEnding > {
2021-06-21 00:09:10 +02:00
match ch {
'\u{000A}' = > Some ( LineEnding ::LF ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 00:09:10 +02:00
'\u{000B}' = > Some ( LineEnding ::VT ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 00:09:10 +02:00
'\u{000C}' = > Some ( LineEnding ::FF ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 00:09:10 +02:00
'\u{000D}' = > Some ( LineEnding ::CR ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 00:09:10 +02:00
'\u{0085}' = > Some ( LineEnding ::Nel ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 00:09:10 +02:00
'\u{2028}' = > Some ( LineEnding ::LS ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 00:09:10 +02:00
'\u{2029}' = > Some ( LineEnding ::PS ) ,
// Not a line ending
_ = > None ,
}
}
2021-06-21 01:13:59 +02:00
// Normally we'd want to implement the FromStr trait, but in this case
// that would force us into a different return type than from_char or
// or from_rope_slice, which would be weird.
#[ allow(clippy::should_implement_trait) ]
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-06-20 09:40:41 +02:00
pub fn from_str ( g : & str ) -> Option < LineEnding > {
match g {
" \u{000D} \u{000A} " = > Some ( LineEnding ::Crlf ) ,
" \u{000A} " = > Some ( LineEnding ::LF ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
" \u{000B} " = > Some ( LineEnding ::VT ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
" \u{000C} " = > Some ( LineEnding ::FF ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
" \u{000D} " = > Some ( LineEnding ::CR ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
" \u{0085} " = > Some ( LineEnding ::Nel ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
" \u{2028} " = > Some ( LineEnding ::LS ) ,
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-20 09:40:41 +02:00
" \u{2029} " = > Some ( LineEnding ::PS ) ,
// Not a line ending
_ = > None ,
}
2021-06-16 17:05:14 +02:00
}
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-06-20 09:40:41 +02:00
pub fn from_rope_slice ( g : & RopeSlice ) -> Option < LineEnding > {
if let Some ( text ) = g . as_str ( ) {
LineEnding ::from_str ( text )
} else {
// Non-contiguous, so it can't be a line ending.
// Specifically, Ropey guarantees that CRLF is always
// contiguous. And the remaining line endings are all
// single `char`s, and therefore trivially contiguous.
None
}
2021-06-16 17:05:14 +02:00
}
}
2021-06-21 00:09:10 +02:00
#[ inline ]
pub fn str_is_line_ending ( s : & str ) -> bool {
LineEnding ::from_str ( s ) . is_some ( )
}
2022-02-07 16:37:09 +01:00
#[ inline ]
pub fn rope_is_line_ending ( r : RopeSlice ) -> bool {
r . chunks ( ) . all ( str_is_line_ending )
}
2021-06-20 09:40:41 +02:00
/// Attempts to detect what line ending the passed document uses.
2021-06-16 17:05:14 +02:00
pub fn auto_detect_line_ending ( doc : & Rope ) -> Option < LineEnding > {
2021-06-20 09:40:41 +02:00
// Return first matched line ending. Not all possible line endings
// are being matched, as they might be special-use only
2021-06-13 21:38:31 +02:00
for line in doc . lines ( ) . take ( 100 ) {
2021-06-20 09:40:41 +02:00
match get_line_ending ( & line ) {
2022-03-16 10:02:37 +01:00
None = > { }
#[ cfg(feature = " unicode-lines " ) ]
Some ( LineEnding ::VT ) | Some ( LineEnding ::FF ) | Some ( LineEnding ::PS ) = > { }
2021-06-20 09:40:41 +02:00
ending = > return ending ,
2021-06-16 17:05:14 +02:00
}
}
2021-06-20 09:40:41 +02:00
None
2021-06-16 17:05:14 +02:00
}
2021-06-19 14:03:14 +02:00
/// Returns the passed line's line ending, if any.
pub fn get_line_ending ( line : & RopeSlice ) -> Option < LineEnding > {
// Last character as str.
2021-06-19 14:05:11 +02:00
let g1 = line
. slice ( line . len_chars ( ) . saturating_sub ( 1 ) .. )
. as_str ( )
. unwrap ( ) ;
2021-06-19 14:03:14 +02:00
// Last two characters as str, or empty str if they're not contiguous.
// It's fine to punt on the non-contiguous case, because Ropey guarantees
// that CRLF is always contiguous.
2021-06-19 14:05:11 +02:00
let g2 = line
. slice ( line . len_chars ( ) . saturating_sub ( 2 ) .. )
. as_str ( )
. unwrap_or ( " " ) ;
2021-06-19 14:03:14 +02:00
// First check the two-character case for CRLF, then check the single-character case.
2021-06-20 09:40:41 +02:00
LineEnding ::from_str ( g2 ) . or_else ( | | LineEnding ::from_str ( g1 ) )
2021-06-19 14:03:14 +02:00
}
2022-03-16 10:02:37 +01:00
#[ cfg(not(feature = " unicode-lines " )) ]
/// Returns the passed line's line ending, if any.
pub fn get_line_ending_of_str ( line : & str ) -> Option < LineEnding > {
if line . ends_with ( " \u{000D} \u{000A} " ) {
Some ( LineEnding ::Crlf )
} else if line . ends_with ( '\u{000A}' ) {
Some ( LineEnding ::LF )
} else {
None
}
}
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 19:29:29 +02:00
/// Returns the passed line's line ending, if any.
pub fn get_line_ending_of_str ( line : & str ) -> Option < LineEnding > {
if line . ends_with ( " \u{000D} \u{000A} " ) {
Some ( LineEnding ::Crlf )
2021-06-21 21:02:44 +02:00
} else if line . ends_with ( '\u{000A}' ) {
2021-06-21 19:29:29 +02:00
Some ( LineEnding ::LF )
2021-06-21 21:02:44 +02:00
} else if line . ends_with ( '\u{000B}' ) {
2021-06-21 19:29:29 +02:00
Some ( LineEnding ::VT )
2021-06-21 21:02:44 +02:00
} else if line . ends_with ( '\u{000C}' ) {
2021-06-21 19:29:29 +02:00
Some ( LineEnding ::FF )
2021-06-21 21:02:44 +02:00
} else if line . ends_with ( '\u{000D}' ) {
2021-06-21 19:29:29 +02:00
Some ( LineEnding ::CR )
2021-06-21 21:02:44 +02:00
} else if line . ends_with ( '\u{0085}' ) {
2021-06-21 19:29:29 +02:00
Some ( LineEnding ::Nel )
2021-06-21 21:02:44 +02:00
} else if line . ends_with ( '\u{2028}' ) {
2021-06-21 19:29:29 +02:00
Some ( LineEnding ::LS )
2021-06-21 21:02:44 +02:00
} else if line . ends_with ( '\u{2029}' ) {
2021-06-21 19:29:29 +02:00
Some ( LineEnding ::PS )
} else {
None
}
}
2021-06-20 09:40:41 +02:00
/// Returns the char index of the end of the given line, not including its line ending.
2021-06-21 00:09:10 +02:00
pub fn line_end_char_index ( slice : & RopeSlice , line : usize ) -> usize {
slice . line_to_char ( line + 1 )
- get_line_ending ( & slice . line ( line ) )
2021-06-20 09:40:41 +02:00
. map ( | le | le . len_chars ( ) )
2021-06-21 00:09:10 +02:00
. unwrap_or ( 0 )
2021-06-20 02:22:10 +02:00
}
2021-07-06 03:58:33 +02:00
/// Fetches line `line_idx` from the passed rope slice, sans any line ending.
pub fn line_without_line_ending < ' a > ( slice : & ' a RopeSlice , line_idx : usize ) -> RopeSlice < ' a > {
let start = slice . line_to_char ( line_idx ) ;
let end = line_end_char_index ( slice , line_idx ) ;
slice . slice ( start .. end )
}
2021-06-22 19:15:30 +02:00
/// Returns the char index of the end of the given RopeSlice, not including
/// any final line ending.
pub fn rope_end_without_line_ending ( slice : & RopeSlice ) -> usize {
slice . len_chars ( ) - get_line_ending ( slice ) . map ( | le | le . len_chars ( ) ) . unwrap_or ( 0 )
}
2021-06-13 21:38:31 +02:00
#[ cfg(test) ]
mod line_ending_tests {
use super ::* ;
#[ test ]
2021-06-21 21:56:42 +02:00
fn line_ending_autodetect ( ) {
2021-06-13 21:38:31 +02:00
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " \n " ) ) ,
Some ( LineEnding ::LF )
) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " \r \n " ) ) ,
Some ( LineEnding ::Crlf )
) ;
assert_eq! ( auto_detect_line_ending ( & Rope ::from_str ( " hello " ) ) , None ) ;
assert_eq! ( auto_detect_line_ending ( & Rope ::from_str ( " " ) ) , None ) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " hello \n helix \r \n " ) ) ,
Some ( LineEnding ::LF )
) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " a formfeed \u{000C} " ) ) ,
None
) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " \n \u{000A} \n \u{000A} " ) ) ,
Some ( LineEnding ::LF )
) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str (
" a formfeed \u{000C} with a \u{000C} linefeed \u{000A} "
) ) ,
Some ( LineEnding ::LF )
) ;
assert_eq! ( auto_detect_line_ending ( & Rope ::from_str ( " a formfeed \u{000C} with a \u{000C} carriage return linefeed \u{000D} \u{000A} and a linefeed \u{000A} " ) ) , Some ( LineEnding ::Crlf ) ) ;
}
#[ test ]
2021-06-21 21:56:42 +02:00
fn str_to_line_ending ( ) {
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 21:56:42 +02:00
assert_eq! ( LineEnding ::from_str ( " \r " ) , Some ( LineEnding ::CR ) ) ;
assert_eq! ( LineEnding ::from_str ( " \n " ) , Some ( LineEnding ::LF ) ) ;
assert_eq! ( LineEnding ::from_str ( " \r \n " ) , Some ( LineEnding ::Crlf ) ) ;
assert_eq! ( LineEnding ::from_str ( " hello \n " ) , None ) ;
}
#[ test ]
fn rope_slice_to_line_ending ( ) {
let r = Rope ::from_str ( " hello \r \n " ) ;
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-13 21:38:31 +02:00
assert_eq! (
2021-06-21 21:56:42 +02:00
LineEnding ::from_rope_slice ( & r . slice ( 5 .. 6 ) ) ,
Some ( LineEnding ::CR )
) ;
assert_eq! (
LineEnding ::from_rope_slice ( & r . slice ( 6 .. 7 ) ) ,
2021-06-13 21:38:31 +02:00
Some ( LineEnding ::LF )
) ;
assert_eq! (
2021-06-21 21:56:42 +02:00
LineEnding ::from_rope_slice ( & r . slice ( 5 .. 7 ) ) ,
2021-06-13 21:38:31 +02:00
Some ( LineEnding ::Crlf )
) ;
2021-06-21 21:56:42 +02:00
assert_eq! ( LineEnding ::from_rope_slice ( & r . slice ( .. ) ) , None ) ;
}
#[ test ]
fn get_line_ending_rope_slice ( ) {
let r = Rope ::from_str ( " Hello \r world \n how \r \n are you? " ) ;
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 21:56:42 +02:00
assert_eq! ( get_line_ending ( & r . slice ( .. 6 ) ) , Some ( LineEnding ::CR ) ) ;
assert_eq! ( get_line_ending ( & r . slice ( .. 12 ) ) , Some ( LineEnding ::LF ) ) ;
assert_eq! ( get_line_ending ( & r . slice ( .. 17 ) ) , Some ( LineEnding ::Crlf ) ) ;
assert_eq! ( get_line_ending ( & r . slice ( .. ) ) , None ) ;
}
#[ test ]
fn get_line_ending_str ( ) {
let text = " Hello \r world \n how \r \n are you? " ;
2022-03-16 10:02:37 +01:00
#[ cfg(feature = " unicode-lines " ) ]
2021-06-21 21:56:42 +02:00
assert_eq! ( get_line_ending_of_str ( & text [ .. 6 ] ) , Some ( LineEnding ::CR ) ) ;
assert_eq! ( get_line_ending_of_str ( & text [ .. 12 ] ) , Some ( LineEnding ::LF ) ) ;
assert_eq! ( get_line_ending_of_str ( & text [ .. 17 ] ) , Some ( LineEnding ::Crlf ) ) ;
2022-01-23 08:37:23 +01:00
assert_eq! ( get_line_ending_of_str ( text ) , None ) ;
2021-06-21 21:56:42 +02:00
}
#[ test ]
fn line_end_char_index_rope_slice ( ) {
let r = Rope ::from_str ( " Hello \r world \n how \r \n are you? " ) ;
let s = & r . slice ( .. ) ;
2022-08-17 03:37:34 +02:00
#[ cfg(not(feature = " unicode-lines " )) ]
{
assert_eq! ( line_end_char_index ( s , 0 ) , 11 ) ;
assert_eq! ( line_end_char_index ( s , 1 ) , 15 ) ;
assert_eq! ( line_end_char_index ( s , 2 ) , 25 ) ;
}
#[ cfg(feature = " unicode-lines " ) ]
{
assert_eq! ( line_end_char_index ( s , 0 ) , 5 ) ;
assert_eq! ( line_end_char_index ( s , 1 ) , 11 ) ;
assert_eq! ( line_end_char_index ( s , 2 ) , 15 ) ;
}
2021-06-13 21:38:31 +02:00
}
}