2021-06-16 17:05:14 +02:00
use crate ::{ Rope , RopeGraphemes , RopeSlice } ;
2021-06-21 00:09:10 +02:00
#[ cfg(target_os = " windows " ) ]
pub const DEFAULT_LINE_ENDING : LineEnding = LineEnding ::Crlf ;
#[ cfg(not(target_os = " windows " )) ]
pub const DEFAULT_LINE_ENDING : LineEnding = LineEnding ::LF ;
2021-06-16 17:05:14 +02:00
/// Represents one of the valid Unicode line endings.
#[ derive(PartialEq, Copy, Clone, Debug) ]
pub enum LineEnding {
Crlf , // CarriageReturn followed by LineFeed
LF , // U+000A -- LineFeed
2021-06-20 09:40:41 +02:00
VT , // U+000B -- VerticalTab
FF , // U+000C -- FormFeed
2021-06-16 17:05:14 +02:00
CR , // U+000D -- CarriageReturn
Nel , // U+0085 -- NextLine
LS , // U+2028 -- Line Separator
2021-06-13 21:38:31 +02:00
PS , // U+2029 -- ParagraphSeparator
2021-06-16 17:05:14 +02:00
}
2021-06-17 13:49:50 +02:00
impl LineEnding {
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-06-19 14:03:14 +02:00
pub fn len_chars ( & self ) -> usize {
2021-06-17 13:49:50 +02:00
match self {
Self ::Crlf = > 2 ,
_ = > 1 ,
}
}
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-06-20 09:40:41 +02:00
pub fn as_str ( & self ) -> & 'static str {
2021-06-17 13:49:50 +02:00
match self {
Self ::Crlf = > " \u{000D} \u{000A} " ,
Self ::LF = > " \u{000A} " ,
2021-06-19 14:03:14 +02:00
Self ::VT = > " \u{000B} " ,
Self ::FF = > " \u{000C} " ,
2021-06-20 09:40:41 +02:00
Self ::CR = > " \u{000D} " ,
Self ::Nel = > " \u{0085} " ,
Self ::LS = > " \u{2028} " ,
2021-06-19 14:03:14 +02:00
Self ::PS = > " \u{2029} " ,
2021-06-17 13:49:50 +02:00
}
}
2021-06-21 00:09:10 +02:00
#[ inline ]
pub fn from_char ( ch : char ) -> Option < LineEnding > {
match ch {
'\u{000A}' = > Some ( LineEnding ::LF ) ,
'\u{000B}' = > Some ( LineEnding ::VT ) ,
'\u{000C}' = > Some ( LineEnding ::FF ) ,
'\u{000D}' = > Some ( LineEnding ::CR ) ,
'\u{0085}' = > Some ( LineEnding ::Nel ) ,
'\u{2028}' = > Some ( LineEnding ::LS ) ,
'\u{2029}' = > Some ( LineEnding ::PS ) ,
// Not a line ending
_ = > None ,
}
}
2021-06-21 01:13:59 +02:00
// Normally we'd want to implement the FromStr trait, but in this case
// that would force us into a different return type than from_char or
// or from_rope_slice, which would be weird.
#[ allow(clippy::should_implement_trait) ]
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-06-20 09:40:41 +02:00
pub fn from_str ( g : & str ) -> Option < LineEnding > {
match g {
" \u{000D} \u{000A} " = > Some ( LineEnding ::Crlf ) ,
" \u{000A} " = > Some ( LineEnding ::LF ) ,
" \u{000B} " = > Some ( LineEnding ::VT ) ,
" \u{000C} " = > Some ( LineEnding ::FF ) ,
" \u{000D} " = > Some ( LineEnding ::CR ) ,
" \u{0085} " = > Some ( LineEnding ::Nel ) ,
" \u{2028} " = > Some ( LineEnding ::LS ) ,
" \u{2029} " = > Some ( LineEnding ::PS ) ,
// Not a line ending
_ = > None ,
}
2021-06-16 17:05:14 +02:00
}
2021-06-21 00:09:10 +02:00
#[ inline ]
2021-06-20 09:40:41 +02:00
pub fn from_rope_slice ( g : & RopeSlice ) -> Option < LineEnding > {
if let Some ( text ) = g . as_str ( ) {
LineEnding ::from_str ( text )
} else {
// Non-contiguous, so it can't be a line ending.
// Specifically, Ropey guarantees that CRLF is always
// contiguous. And the remaining line endings are all
// single `char`s, and therefore trivially contiguous.
None
}
2021-06-16 17:05:14 +02:00
}
}
2021-06-21 00:09:10 +02:00
#[ inline ]
pub fn str_is_line_ending ( s : & str ) -> bool {
LineEnding ::from_str ( s ) . is_some ( )
}
2021-06-20 09:40:41 +02:00
/// Attempts to detect what line ending the passed document uses.
2021-06-16 17:05:14 +02:00
pub fn auto_detect_line_ending ( doc : & Rope ) -> Option < LineEnding > {
2021-06-20 09:40:41 +02:00
// Return first matched line ending. Not all possible line endings
// are being matched, as they might be special-use only
2021-06-13 21:38:31 +02:00
for line in doc . lines ( ) . take ( 100 ) {
2021-06-20 09:40:41 +02:00
match get_line_ending ( & line ) {
None | Some ( LineEnding ::VT ) | Some ( LineEnding ::FF ) | Some ( LineEnding ::PS ) = > { }
ending = > return ending ,
2021-06-16 17:05:14 +02:00
}
}
2021-06-20 09:40:41 +02:00
None
2021-06-16 17:05:14 +02:00
}
2021-06-19 14:03:14 +02:00
/// Returns the passed line's line ending, if any.
pub fn get_line_ending ( line : & RopeSlice ) -> Option < LineEnding > {
// Last character as str.
2021-06-19 14:05:11 +02:00
let g1 = line
. slice ( line . len_chars ( ) . saturating_sub ( 1 ) .. )
. as_str ( )
. unwrap ( ) ;
2021-06-19 14:03:14 +02:00
// Last two characters as str, or empty str if they're not contiguous.
// It's fine to punt on the non-contiguous case, because Ropey guarantees
// that CRLF is always contiguous.
2021-06-19 14:05:11 +02:00
let g2 = line
. slice ( line . len_chars ( ) . saturating_sub ( 2 ) .. )
. as_str ( )
. unwrap_or ( " " ) ;
2021-06-19 14:03:14 +02:00
// First check the two-character case for CRLF, then check the single-character case.
2021-06-20 09:40:41 +02:00
LineEnding ::from_str ( g2 ) . or_else ( | | LineEnding ::from_str ( g1 ) )
2021-06-19 14:03:14 +02:00
}
2021-06-21 19:29:29 +02:00
/// Returns the passed line's line ending, if any.
pub fn get_line_ending_of_str ( line : & str ) -> Option < LineEnding > {
if line . ends_with ( " \u{000D} \u{000A} " ) {
Some ( LineEnding ::Crlf )
} else if line . ends_with ( " \u{000A} " ) {
Some ( LineEnding ::LF )
} else if line . ends_with ( " \u{000B} " ) {
Some ( LineEnding ::VT )
} else if line . ends_with ( " \u{000C} " ) {
Some ( LineEnding ::FF )
} else if line . ends_with ( " \u{000D} " ) {
Some ( LineEnding ::CR )
} else if line . ends_with ( " \u{0085} " ) {
Some ( LineEnding ::Nel )
} else if line . ends_with ( " \u{2028} " ) {
Some ( LineEnding ::LS )
} else if line . ends_with ( " \u{2029} " ) {
Some ( LineEnding ::PS )
} else {
None
}
}
2021-06-20 09:40:41 +02:00
/// Returns the char index of the end of the given line, not including its line ending.
2021-06-21 00:09:10 +02:00
pub fn line_end_char_index ( slice : & RopeSlice , line : usize ) -> usize {
slice . line_to_char ( line + 1 )
- get_line_ending ( & slice . line ( line ) )
2021-06-20 09:40:41 +02:00
. map ( | le | le . len_chars ( ) )
2021-06-21 00:09:10 +02:00
. unwrap_or ( 0 )
2021-06-20 02:22:10 +02:00
}
2021-06-13 21:38:31 +02:00
#[ cfg(test) ]
mod line_ending_tests {
use super ::* ;
#[ test ]
fn test_autodetect ( ) {
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " \n " ) ) ,
Some ( LineEnding ::LF )
) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " \r \n " ) ) ,
Some ( LineEnding ::Crlf )
) ;
assert_eq! ( auto_detect_line_ending ( & Rope ::from_str ( " hello " ) ) , None ) ;
assert_eq! ( auto_detect_line_ending ( & Rope ::from_str ( " " ) ) , None ) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " hello \n helix \r \n " ) ) ,
Some ( LineEnding ::LF )
) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " a formfeed \u{000C} " ) ) ,
None
) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str ( " \n \u{000A} \n \u{000A} " ) ) ,
Some ( LineEnding ::LF )
) ;
assert_eq! (
auto_detect_line_ending ( & Rope ::from_str (
" a formfeed \u{000C} with a \u{000C} linefeed \u{000A} "
) ) ,
Some ( LineEnding ::LF )
) ;
assert_eq! ( auto_detect_line_ending ( & Rope ::from_str ( " a formfeed \u{000C} with a \u{000C} carriage return linefeed \u{000D} \u{000A} and a linefeed \u{000A} " ) ) , Some ( LineEnding ::Crlf ) ) ;
}
#[ test ]
fn test_rope_slice_to_line_ending ( ) {
let r = Rope ::from_str ( " \r \n " ) ;
assert_eq! (
2021-06-21 00:09:10 +02:00
LineEnding ::from_rope_slice ( & r . slice ( 1 .. 2 ) ) ,
2021-06-13 21:38:31 +02:00
Some ( LineEnding ::LF )
) ;
assert_eq! (
2021-06-21 00:09:10 +02:00
LineEnding ::from_rope_slice ( & r . slice ( 0 .. 2 ) ) ,
2021-06-13 21:38:31 +02:00
Some ( LineEnding ::Crlf )
) ;
}
}