From fd98e743e888e8fbddf4987213afbb19dd148fd7 Mon Sep 17 00:00:00 2001 From: Shafkath Shuhan Date: Wed, 23 Jun 2021 02:03:34 -0400 Subject: [PATCH] Handle non-UTF8 files --- .gitattributes | 1 + Cargo.lock | 42 ++++- helix-core/src/lib.rs | 2 +- helix-term/src/commands.rs | 14 +- helix-view/Cargo.toml | 4 +- helix-view/src/document.rs | 351 ++++++++++++++++++++++++++++++++----- helix-view/src/editor.rs | 7 +- 7 files changed, 357 insertions(+), 64 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..0f1149f4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +helix-view/tests/* linguist-generated diff --git a/Cargo.lock b/Cargo.lock index 66083bff..2a90b539 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,12 +65,29 @@ dependencies = [ "jobserver", ] +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chardetng" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81a81b0d8f8ee23417182818b4f06312c5f535c2b04eef1773f7c24bbdf8c500" +dependencies = [ + "cfg-if 0.1.10", + "encoding_rs", + "memchr", +] + [[package]] name = "chrono" version = "0.4.19" @@ -89,7 +106,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "lazy_static", ] @@ -125,7 +142,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "dirs-sys-next", ] @@ -146,13 +163,22 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "encoding_rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "etcetera" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "016b04fd1e94fb833d432634245c9bb61cf1c7409668a0e7d4c3ab00c5172dec" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "dirs-next", "thiserror", ] @@ -244,7 +270,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi", ] @@ -355,7 +381,9 @@ name = "helix-view" version = "0.2.0" dependencies = [ "anyhow", + "chardetng", "crossterm", + "encoding_rs", "futures-util", "helix-core", "helix-lsp", @@ -414,7 +442,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -472,7 +500,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -595,7 +623,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "instant", "libc", "redox_syscall", diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 7d51f88c..ae135b00 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -101,7 +101,7 @@ pub use etcetera::home_dir; use etcetera::base_strategy::{choose_base_strategy, BaseStrategy}; -pub use ropey::{Rope, RopeSlice}; +pub use ropey::{Rope, RopeBuilder, RopeSlice}; pub use tendril::StrTendril as Tendril; diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 8c76e869..cf583953 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -239,7 +239,6 @@ impl Command { join_selections, keep_selections, keep_primary_selection, - save, completion, hover, toggle_comments, @@ -1080,7 +1079,7 @@ mod cmd { if autofmt { doc.format(view.id); // TODO: merge into save } - helix_lsp::block_on(doc.save()); + helix_lsp::block_on(tokio::spawn(doc.save())); Ok(()) } @@ -1256,7 +1255,7 @@ mod cmd { errors.push_str("cannot write a buffer without a filename\n"); continue; } - helix_lsp::block_on(doc.save()); + helix_lsp::block_on(tokio::spawn(doc.save())); } editor.set_error(errors); @@ -2950,15 +2949,6 @@ fn keep_primary_selection(cx: &mut Context) { doc.set_selection(view.id, selection); } -// - -fn save(cx: &mut Context) { - // Spawns an async task to actually do the saving. This way we prevent blocking. - - // TODO: handle save errors somehow? - tokio::spawn(doc_mut!(cx.editor).save()); -} - fn completion(cx: &mut Context) { // trigger on trigger char, or if user calls it // (or on word char typing??) diff --git a/helix-view/Cargo.toml b/helix-view/Cargo.toml index 8d93d2d9..cadbbdbd 100644 --- a/helix-view/Cargo.toml +++ b/helix-view/Cargo.toml @@ -31,9 +31,11 @@ futures-util = { version = "0.3", features = ["std", "async-await"], default-fea slotmap = "1" +encoding_rs = "0.8" +chardetng = "0.1" + serde = { version = "1.0", features = ["derive"] } toml = "0.5" log = "~0.4" which = "4.1" - diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index d3c6cf9e..92778ad7 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -11,7 +11,7 @@ use helix_core::{ history::History, line_ending::auto_detect_line_ending, syntax::{self, LanguageConfiguration}, - ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, + ChangeSet, Diagnostic, LineEnding, Rope, RopeBuilder, Selection, State, Syntax, Transaction, DEFAULT_LINE_ENDING, }; @@ -19,6 +19,8 @@ use crate::{DocumentId, Theme, ViewId}; use std::collections::HashMap; +const BUF_SIZE: usize = 8192; + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum Mode { Normal, @@ -39,6 +41,7 @@ pub struct Document { pub(crate) selections: HashMap, path: Option, + encoding: &'static encoding_rs::Encoding, /// Current editing mode. pub mode: Mode, @@ -78,6 +81,7 @@ impl fmt::Debug for Document { .field("text", &self.text) .field("selections", &self.selections) .field("path", &self.path) + .field("encoding", &self.encoding) .field("mode", &self.mode) .field("restore_cursor", &self.restore_cursor) .field("syntax", &self.syntax) @@ -116,6 +120,173 @@ impl FromStr for Mode { } } +// The documentation and implementation of this function should be up-to-date with +// its sibling function, `to_writer()`. +// +/// Decodes a stream of bytes into UTF-8, returning a `Rope` and the +/// encoding it was decoded as. The optional `encoding` parameter can +/// be used to override encoding auto-detection. +pub fn from_reader( + reader: &mut R, + encoding: Option<&'static encoding_rs::Encoding>, +) -> Result<(Rope, &'static encoding_rs::Encoding), Error> { + // These two buffers are 8192 bytes in size each and are used as + // intermediaries during the decoding process. Text read into `buf` + // from `reader` is decoded into `buf_out` as UTF-8. Once either + // `buf_out` is full or the end of the reader was reached, the + // contents are appended to `builder`. + let mut buf = [0u8; BUF_SIZE]; + let mut buf_out = [0u8; BUF_SIZE]; + let mut builder = RopeBuilder::new(); + + // By default, the encoding of the text is auto-detected via the + // `chardetng` crate which requires sample data from the reader. + // As a manual override to this auto-detection is possible, the + // same data is read into `buf` to ensure symmetry in the upcoming + // loop. + let (encoding, mut decoder, mut slice, mut is_empty) = { + let read = reader.read(&mut buf)?; + let is_empty = read == 0; + let encoding = encoding.unwrap_or_else(|| { + let mut encoding_detector = chardetng::EncodingDetector::new(); + encoding_detector.feed(&buf, is_empty); + encoding_detector.guess(None, true) + }); + let decoder = encoding.new_decoder(); + + // If the amount of bytes read from the reader is less than + // `buf.len()`, it is undesirable to read the bytes afterwards. + let slice = &buf[..read]; + (encoding, decoder, slice, is_empty) + }; + + // `RopeBuilder::append()` expects a `&str`, so this is the "real" + // output buffer. When decoding, the number of bytes in the output + // buffer will often exceed the number of bytes in the input buffer. + // The `result` returned by `decode_to_str()` will state whether or + // not that happened. The contents of `buf_str` is appended to + // `builder` and it is reused for the next iteration of the decoding + // loop. + // + // As it is possible to read less than the buffer's maximum from `read()` + // even when the end of the reader has yet to be reached, the end of + // the reader is determined only when a `read()` call returns `0`. + // + // SAFETY: `buf_out` is a zero-initialized array, thus it will always + // contain valid UTF-8. + let buf_str = unsafe { std::str::from_utf8_unchecked_mut(&mut buf_out[..]) }; + let mut total_written = 0usize; + loop { + let mut total_read = 0usize; + + loop { + let (result, read, written, ..) = decoder.decode_to_str( + &slice[total_read..], + &mut buf_str[total_written..], + is_empty, + ); + + // These variables act as the read and write cursors of `buf` and `buf_str` respectively. + // They are necessary in case the output buffer fills before decoding of the entire input + // loop is complete. Otherwise, the loop would endlessly iterate over the same `buf` and + // the data inside the output buffer would be overwritten. + total_read += read; + total_written += written; + match result { + encoding_rs::CoderResult::InputEmpty => { + debug_assert_eq!(slice.len(), total_read); + break; + } + encoding_rs::CoderResult::OutputFull => { + debug_assert!(slice.len() > total_read); + builder.append(&buf_str[..total_written]); + total_written = 0; + } + } + } + // Once the end of the stream is reached, the output buffer is + // flushed and the loop terminates. + if is_empty { + debug_assert_eq!(reader.read(&mut buf)?, 0); + builder.append(&buf_str[..total_written]); + break; + } + + // Once the previous input has been processed and decoded, the next set of + // data is fetched from the reader. The end of the reader is determined to + // be when exactly `0` bytes were read from the reader, as per the invariants + // of the `Read` trait. + let read = reader.read(&mut buf)?; + slice = &buf[..read]; + is_empty = read == 0; + } + let rope = builder.finish(); + Ok((rope, encoding)) +} + +// The documentation and implementation of this function should be up-to-date with +// its sibling function, `from_reader()`. +// +/// Encodes the text inside `rope` into the given `encoding` and writes the +/// encoded output into `writer.` As a `Rope` can only contain valid UTF-8, +/// replacement characters may appear in the encoded text. +pub async fn to_writer<'a, W: tokio::io::AsyncWriteExt + Unpin + ?Sized>( + writer: &'a mut W, + encoding: &'static encoding_rs::Encoding, + rope: &'a Rope, +) -> Result<(), Error> { + // Text inside a `Rope` is stored as non-contiguous blocks of data called + // chunks. The absolute size of each chunk is unknown, thus it is impossible + // to predict the end of the chunk iterator ahead of time. Instead, it is + // determined by filtering the iterator to remove all empty chunks and then + // appending an empty chunk to it. This is valuable for detecting when all + // chunks in the `Rope` have been iterated over in the subsequent loop. + let iter = rope + .chunks() + .filter(|c| !c.is_empty()) + .chain(std::iter::once("")); + let mut buf = [0u8; BUF_SIZE]; + let mut encoder = encoding.new_encoder(); + let mut total_written = 0usize; + for chunk in iter { + let is_empty = chunk.is_empty(); + let mut total_read = 0usize; + + loop { + let (result, read, written, ..) = + encoder.encode_from_utf8(&chunk[total_read..], &mut buf[total_written..], is_empty); + + // These variables act as the read and write cursors of `chunk` and `buf` respectively. + // They are necessary in case the output buffer fills before encoding of the entire input + // loop is complete. Otherwise, the loop would endlessly iterate over the same `chunk` and + // the data inside the output buffer would be overwritten. + total_read += read; + total_written += written; + match result { + encoding_rs::CoderResult::InputEmpty => { + debug_assert_eq!(chunk.len(), total_read); + debug_assert!(buf.len() >= total_written); + break; + } + encoding_rs::CoderResult::OutputFull => { + debug_assert!(chunk.len() > total_read); + writer.write_all(&buf[..total_written]).await?; + total_written = 0; + } + } + } + + // Once the end of the iterator is reached, the output buffer is + // flushed and the outer loop terminates. + if is_empty { + writer.write_all(&buf[..total_written]).await?; + writer.flush().await?; + break; + } + } + Ok(()) +} + /// Like std::mem::replace() except it allows the replacement value to be mapped from the /// original value. fn take_with(mut_ref: &mut T, closure: F) @@ -216,13 +387,15 @@ use helix_lsp::lsp; use url::Url; impl Document { - pub fn new(text: Rope) -> Self { + pub fn from(text: Rope, encoding: Option<&'static encoding_rs::Encoding>) -> Self { + let encoding = encoding.unwrap_or(encoding_rs::UTF_8); let changes = ChangeSet::new(&text); let old_state = None; Self { id: DocumentId::default(), path: None, + encoding, text, selections: HashMap::default(), indent_style: IndentStyle::Spaces(4), @@ -242,29 +415,31 @@ impl Document { } // TODO: async fn? - pub fn load( + /// Create a new document from `path`. Encoding is auto-detected, but it can be manually + /// overwritten with the `encoding` parameter. + pub fn open( path: PathBuf, + encoding: Option<&'static encoding_rs::Encoding>, theme: Option<&Theme>, config_loader: Option<&syntax::Loader>, ) -> Result { - use std::{fs::File, io::BufReader}; - - let mut doc = if !path.exists() { - Rope::from(DEFAULT_LINE_ENDING.as_str()) - } else { - let file = File::open(&path).context(format!("unable to open {:?}", path))?; - Rope::from_reader(BufReader::new(file))? - }; - - // search for line endings - let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING); - - // add missing newline at the end of file - if doc.len_bytes() == 0 || !char_is_line_ending(doc.char(doc.len_chars() - 1)) { - doc.insert(doc.len_chars(), line_ending.as_str()); + if !path.exists() { + return Ok(Self::default()); } - let mut doc = Self::new(doc); + let mut file = std::fs::File::open(&path).context(format!("unable to open {:?}", path))?; + let (mut rope, encoding) = from_reader(&mut file, encoding)?; + + // search for line endings + let line_ending = auto_detect_line_ending(&rope).unwrap_or(DEFAULT_LINE_ENDING); + + // add missing newline at the end of file + if rope.len_bytes() == 0 || !char_is_line_ending(rope.char(rope.len_chars() - 1)) { + rope.insert(rope.len_chars(), line_ending.as_str()); + } + + let mut doc = Self::from(rope, Some(encoding)); + // set the path and try detecting the language doc.set_path(&path)?; doc.detect_indent_style(); @@ -303,6 +478,8 @@ impl Document { // TODO: do we need some way of ensuring two save operations on the same doc can't run at once? // or is that handled by the OS/async layer + /// The `Document`'s text is encoded according to its encoding and written to the file located + /// at its `path()`. pub fn save(&mut self) -> impl Future> { // we clone and move text + path into the future so that we asynchronously save the current // state without blocking any further edits. @@ -320,8 +497,11 @@ impl Document { self.last_saved_revision = history.current_revision(); self.history.set(history); + let encoding = self.encoding; + + // We encode the file according to the `Document`'s encoding. async move { - use tokio::{fs::File, io::AsyncWriteExt}; + use tokio::fs::File; if let Some(parent) = path.parent() { // TODO: display a prompt asking the user if the directories should be created if !parent.exists() { @@ -330,13 +510,9 @@ impl Document { )); } } - let mut file = File::create(path).await?; - // write all the rope chunks to file - for chunk in text.chunks() { - file.write_all(chunk.as_bytes()).await?; - } - // TODO: flush? + let mut file = File::create(path).await?; + to_writer(&mut file, encoding, &text).await?; if let Some(language_server) = language_server { language_server @@ -531,7 +707,7 @@ impl Document { self.selections.insert(view_id, selection); } - fn _apply(&mut self, transaction: &Transaction, view_id: ViewId) -> bool { + fn apply_impl(&mut self, transaction: &Transaction, view_id: ViewId) -> bool { let old_doc = self.text().clone(); let success = transaction.changes().apply(&mut self.text); @@ -594,7 +770,7 @@ impl Document { }); } - let success = self._apply(transaction, view_id); + let success = self.apply_impl(transaction, view_id); if !transaction.changes().is_empty() { // Compose this transaction with the previous one @@ -608,7 +784,7 @@ impl Document { pub fn undo(&mut self, view_id: ViewId) { let mut history = self.history.take(); let success = if let Some(transaction) = history.undo() { - self._apply(&transaction, view_id) + self.apply_impl(transaction, view_id) } else { false }; @@ -623,7 +799,7 @@ impl Document { pub fn redo(&mut self, view_id: ViewId) { let mut history = self.history.take(); let success = if let Some(transaction) = history.redo() { - self._apply(&transaction, view_id) + self.apply_impl(transaction, view_id) } else { false }; @@ -638,14 +814,14 @@ impl Document { pub fn earlier(&mut self, view_id: ViewId, uk: helix_core::history::UndoKind) { let txns = self.history.get_mut().earlier(uk); for txn in txns { - self._apply(&txn, view_id); + self.apply_impl(&txn, view_id); } } pub fn later(&mut self, view_id: ViewId, uk: helix_core::history::UndoKind) { let txns = self.history.get_mut().later(uk); for txn in txns { - self._apply(&txn, view_id); + self.apply_impl(&txn, view_id); } } @@ -670,12 +846,10 @@ impl Document { self.history.set(history); } - #[inline] pub fn id(&self) -> DocumentId { self.id } - #[inline] pub fn is_modified(&self) -> bool { let history = self.history.take(); let current_revision = history.current_revision(); @@ -683,12 +857,10 @@ impl Document { current_revision != self.last_saved_revision || !self.changes.is_empty() } - #[inline] pub fn mode(&self) -> Mode { self.mode } - #[inline] /// Corresponding language scope name. Usually `source.`. pub fn language(&self) -> Option<&str> { self.language @@ -696,21 +868,21 @@ impl Document { .map(|language| language.scope.as_str()) } - #[inline] pub fn language_config(&self) -> Option<&LanguageConfiguration> { self.language.as_deref() } - #[inline] /// Current document version, incremented at each change. pub fn version(&self) -> i32 { self.version } + #[inline] pub fn language_server(&self) -> Option<&helix_lsp::Client> { self.language_server.as_deref() } + #[inline] /// Tree-sitter AST tree pub fn syntax(&self) -> Option<&Syntax> { self.syntax.as_ref() @@ -756,10 +928,12 @@ impl Document { self.path().map(|path| Url::from_file_path(path).unwrap()) } + #[inline] pub fn text(&self) -> &Rope { &self.text } + #[inline] pub fn selection(&self, view_id: ViewId) -> &Selection { &self.selections[&view_id] } @@ -787,6 +961,7 @@ impl Document { // -- LSP methods + #[inline] pub fn identifier(&self) -> lsp::TextDocumentIdentifier { lsp::TextDocumentIdentifier::new(self.url().unwrap()) } @@ -795,6 +970,7 @@ impl Document { lsp::VersionedTextDocumentIdentifier::new(self.url().unwrap(), self.version) } + #[inline] pub fn diagnostics(&self) -> &[Diagnostic] { &self.diagnostics } @@ -804,6 +980,13 @@ impl Document { } } +impl Default for Document { + fn default() -> Self { + let text = Rope::from(DEFAULT_LINE_ENDING.as_str()); + Self::from(text, None) + } +} + #[cfg(test)] mod test { use super::*; @@ -812,7 +995,7 @@ mod test { fn changeset_to_changes() { use helix_lsp::{lsp, Client, OffsetEncoding}; let text = Rope::from("hello"); - let mut doc = Document::new(text); + let mut doc = Document::from(text, None); let view = ViewId::default(); doc.set_selection(view, Selection::single(5, 5)); @@ -921,4 +1104,94 @@ mod test { ] ); } + + #[test] + fn test_line_ending() { + if cfg!(windows) { + assert_eq!(Document::default().text().to_string(), "\r\n"); + } else { + assert_eq!(Document::default().text().to_string(), "\n"); + } + } + + macro_rules! test_decode { + ($label:expr, $label_override:expr) => { + let encoding = encoding_rs::Encoding::for_label($label_override.as_bytes()).unwrap(); + let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests"); + let path = base_path.join(format!("{}_in.txt", $label)); + let ref_path = base_path.join(format!("{}_in_ref.txt", $label)); + assert!(path.exists()); + assert!(ref_path.exists()); + + let mut file = std::fs::File::open(path).unwrap(); + let text = from_reader(&mut file, Some(encoding)) + .unwrap() + .0 + .to_string(); + let expectation = std::fs::read_to_string(ref_path).unwrap(); + assert_eq!(text[..], expectation[..]); + }; + } + + macro_rules! test_encode { + ($label:expr, $label_override:expr) => { + let encoding = encoding_rs::Encoding::for_label($label_override.as_bytes()).unwrap(); + let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests"); + let path = base_path.join(format!("{}_out.txt", $label)); + let ref_path = base_path.join(format!("{}_out_ref.txt", $label)); + assert!(path.exists()); + assert!(ref_path.exists()); + + let text = Rope::from_str(&std::fs::read_to_string(path).unwrap()); + let mut buf: Vec = Vec::new(); + helix_lsp::block_on(to_writer(&mut buf, encoding, &text)).unwrap(); + + let expectation = std::fs::read(ref_path).unwrap(); + assert_eq!(buf, expectation); + }; + } + + macro_rules! test_decode_fn { + ($name:ident, $label:expr, $label_override:expr) => { + #[test] + fn $name() { + test_decode!($label, $label_override); + } + }; + ($name:ident, $label:expr) => { + #[test] + fn $name() { + test_decode!($label, $label); + } + }; + } + + macro_rules! test_encode_fn { + ($name:ident, $label:expr, $label_override:expr) => { + #[test] + fn $name() { + test_encode!($label, $label_override); + } + }; + ($name:ident, $label:expr) => { + #[test] + fn $name() { + test_encode!($label, $label); + } + }; + } + + test_decode_fn!(test_big5_decode, "big5"); + test_encode_fn!(test_big5_encode, "big5"); + test_decode_fn!(test_euc_kr_decode, "euc_kr", "EUC-KR"); + test_encode_fn!(test_euc_kr_encode, "euc_kr", "EUC-KR"); + test_decode_fn!(test_gb18030_decode, "gb18030"); + test_encode_fn!(test_gb18030_encode, "gb18030"); + test_decode_fn!(test_iso_2022_jp_decode, "iso_2022_jp", "ISO-2022-JP"); + test_encode_fn!(test_iso_2022_jp_encode, "iso_2022_jp", "ISO-2022-JP"); + test_decode_fn!(test_jis0208_decode, "jis0208", "EUC-JP"); + test_encode_fn!(test_jis0208_encode, "jis0208", "EUC-JP"); + test_decode_fn!(test_jis0212_decode, "jis0212", "EUC-JP"); + test_decode_fn!(test_shift_jis_decode, "shift_jis"); + test_encode_fn!(test_shift_jis_encode, "shift_jis"); } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 839bcdcd..7f910b80 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -17,7 +17,7 @@ use anyhow::Error; pub use helix_core::diagnostic::Severity; pub use helix_core::register::Registers; -use helix_core::{Position, DEFAULT_LINE_ENDING}; +use helix_core::Position; #[derive(Debug)] pub struct Editor { @@ -171,8 +171,7 @@ impl Editor { } pub fn new_file(&mut self, action: Action) -> DocumentId { - use helix_core::Rope; - let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str())); + let doc = Document::default(); let id = self.documents.insert(doc); self.documents[id].id = id; self.switch(id, action); @@ -190,7 +189,7 @@ impl Editor { let id = if let Some(id) = id { id } else { - let mut doc = Document::load(path, Some(&self.theme), Some(&self.syn_loader))?; + let mut doc = Document::open(path, None, Some(&self.theme), Some(&self.syn_loader))?; // try to find a language server based on the language name let language_server = doc