Handle non-UTF8 files
This commit is contained in:
parent
9706f1121d
commit
fd98e743e8
7 changed files with 357 additions and 64 deletions
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
helix-view/tests/* linguist-generated
|
42
Cargo.lock
generated
42
Cargo.lock
generated
|
@ -65,12 +65,29 @@ dependencies = [
|
||||||
"jobserver",
|
"jobserver",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "0.1.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "chardetng"
|
||||||
|
version = "0.1.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "81a81b0d8f8ee23417182818b4f06312c5f535c2b04eef1773f7c24bbdf8c500"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 0.1.10",
|
||||||
|
"encoding_rs",
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "chrono"
|
name = "chrono"
|
||||||
version = "0.4.19"
|
version = "0.4.19"
|
||||||
|
@ -89,7 +106,7 @@ version = "0.8.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
|
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if 1.0.0",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -125,7 +142,7 @@ version = "2.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
|
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if 1.0.0",
|
||||||
"dirs-sys-next",
|
"dirs-sys-next",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -146,13 +163,22 @@ version = "1.6.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding_rs"
|
||||||
|
version = "0.8.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "etcetera"
|
name = "etcetera"
|
||||||
version = "0.3.2"
|
version = "0.3.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "016b04fd1e94fb833d432634245c9bb61cf1c7409668a0e7d4c3ab00c5172dec"
|
checksum = "016b04fd1e94fb833d432634245c9bb61cf1c7409668a0e7d4c3ab00c5172dec"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if 1.0.0",
|
||||||
"dirs-next",
|
"dirs-next",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
@ -244,7 +270,7 @@ version = "0.2.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
|
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if 1.0.0",
|
||||||
"libc",
|
"libc",
|
||||||
"wasi",
|
"wasi",
|
||||||
]
|
]
|
||||||
|
@ -355,7 +381,9 @@ name = "helix-view"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"chardetng",
|
||||||
"crossterm",
|
"crossterm",
|
||||||
|
"encoding_rs",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"helix-core",
|
"helix-core",
|
||||||
"helix-lsp",
|
"helix-lsp",
|
||||||
|
@ -414,7 +442,7 @@ version = "0.1.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
|
checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if 1.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -472,7 +500,7 @@ version = "0.4.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if 1.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -595,7 +623,7 @@ version = "0.8.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
|
checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if 1.0.0",
|
||||||
"instant",
|
"instant",
|
||||||
"libc",
|
"libc",
|
||||||
"redox_syscall",
|
"redox_syscall",
|
||||||
|
|
|
@ -101,7 +101,7 @@ pub use etcetera::home_dir;
|
||||||
|
|
||||||
use etcetera::base_strategy::{choose_base_strategy, BaseStrategy};
|
use etcetera::base_strategy::{choose_base_strategy, BaseStrategy};
|
||||||
|
|
||||||
pub use ropey::{Rope, RopeSlice};
|
pub use ropey::{Rope, RopeBuilder, RopeSlice};
|
||||||
|
|
||||||
pub use tendril::StrTendril as Tendril;
|
pub use tendril::StrTendril as Tendril;
|
||||||
|
|
||||||
|
|
|
@ -239,7 +239,6 @@ impl Command {
|
||||||
join_selections,
|
join_selections,
|
||||||
keep_selections,
|
keep_selections,
|
||||||
keep_primary_selection,
|
keep_primary_selection,
|
||||||
save,
|
|
||||||
completion,
|
completion,
|
||||||
hover,
|
hover,
|
||||||
toggle_comments,
|
toggle_comments,
|
||||||
|
@ -1080,7 +1079,7 @@ mod cmd {
|
||||||
if autofmt {
|
if autofmt {
|
||||||
doc.format(view.id); // TODO: merge into save
|
doc.format(view.id); // TODO: merge into save
|
||||||
}
|
}
|
||||||
helix_lsp::block_on(doc.save());
|
helix_lsp::block_on(tokio::spawn(doc.save()));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1256,7 +1255,7 @@ mod cmd {
|
||||||
errors.push_str("cannot write a buffer without a filename\n");
|
errors.push_str("cannot write a buffer without a filename\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
helix_lsp::block_on(doc.save());
|
helix_lsp::block_on(tokio::spawn(doc.save()));
|
||||||
}
|
}
|
||||||
editor.set_error(errors);
|
editor.set_error(errors);
|
||||||
|
|
||||||
|
@ -2950,15 +2949,6 @@ fn keep_primary_selection(cx: &mut Context) {
|
||||||
doc.set_selection(view.id, selection);
|
doc.set_selection(view.id, selection);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
|
|
||||||
fn save(cx: &mut Context) {
|
|
||||||
// Spawns an async task to actually do the saving. This way we prevent blocking.
|
|
||||||
|
|
||||||
// TODO: handle save errors somehow?
|
|
||||||
tokio::spawn(doc_mut!(cx.editor).save());
|
|
||||||
}
|
|
||||||
|
|
||||||
fn completion(cx: &mut Context) {
|
fn completion(cx: &mut Context) {
|
||||||
// trigger on trigger char, or if user calls it
|
// trigger on trigger char, or if user calls it
|
||||||
// (or on word char typing??)
|
// (or on word char typing??)
|
||||||
|
|
|
@ -31,9 +31,11 @@ futures-util = { version = "0.3", features = ["std", "async-await"], default-fea
|
||||||
|
|
||||||
slotmap = "1"
|
slotmap = "1"
|
||||||
|
|
||||||
|
encoding_rs = "0.8"
|
||||||
|
chardetng = "0.1"
|
||||||
|
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
toml = "0.5"
|
toml = "0.5"
|
||||||
log = "~0.4"
|
log = "~0.4"
|
||||||
|
|
||||||
which = "4.1"
|
which = "4.1"
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ use helix_core::{
|
||||||
history::History,
|
history::History,
|
||||||
line_ending::auto_detect_line_ending,
|
line_ending::auto_detect_line_ending,
|
||||||
syntax::{self, LanguageConfiguration},
|
syntax::{self, LanguageConfiguration},
|
||||||
ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction,
|
ChangeSet, Diagnostic, LineEnding, Rope, RopeBuilder, Selection, State, Syntax, Transaction,
|
||||||
DEFAULT_LINE_ENDING,
|
DEFAULT_LINE_ENDING,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -19,6 +19,8 @@ use crate::{DocumentId, Theme, ViewId};
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
const BUF_SIZE: usize = 8192;
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum Mode {
|
pub enum Mode {
|
||||||
Normal,
|
Normal,
|
||||||
|
@ -39,6 +41,7 @@ pub struct Document {
|
||||||
pub(crate) selections: HashMap<ViewId, Selection>,
|
pub(crate) selections: HashMap<ViewId, Selection>,
|
||||||
|
|
||||||
path: Option<PathBuf>,
|
path: Option<PathBuf>,
|
||||||
|
encoding: &'static encoding_rs::Encoding,
|
||||||
|
|
||||||
/// Current editing mode.
|
/// Current editing mode.
|
||||||
pub mode: Mode,
|
pub mode: Mode,
|
||||||
|
@ -78,6 +81,7 @@ impl fmt::Debug for Document {
|
||||||
.field("text", &self.text)
|
.field("text", &self.text)
|
||||||
.field("selections", &self.selections)
|
.field("selections", &self.selections)
|
||||||
.field("path", &self.path)
|
.field("path", &self.path)
|
||||||
|
.field("encoding", &self.encoding)
|
||||||
.field("mode", &self.mode)
|
.field("mode", &self.mode)
|
||||||
.field("restore_cursor", &self.restore_cursor)
|
.field("restore_cursor", &self.restore_cursor)
|
||||||
.field("syntax", &self.syntax)
|
.field("syntax", &self.syntax)
|
||||||
|
@ -116,6 +120,173 @@ impl FromStr for Mode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The documentation and implementation of this function should be up-to-date with
|
||||||
|
// its sibling function, `to_writer()`.
|
||||||
|
//
|
||||||
|
/// Decodes a stream of bytes into UTF-8, returning a `Rope` and the
|
||||||
|
/// encoding it was decoded as. The optional `encoding` parameter can
|
||||||
|
/// be used to override encoding auto-detection.
|
||||||
|
pub fn from_reader<R: std::io::Read + ?Sized>(
|
||||||
|
reader: &mut R,
|
||||||
|
encoding: Option<&'static encoding_rs::Encoding>,
|
||||||
|
) -> Result<(Rope, &'static encoding_rs::Encoding), Error> {
|
||||||
|
// These two buffers are 8192 bytes in size each and are used as
|
||||||
|
// intermediaries during the decoding process. Text read into `buf`
|
||||||
|
// from `reader` is decoded into `buf_out` as UTF-8. Once either
|
||||||
|
// `buf_out` is full or the end of the reader was reached, the
|
||||||
|
// contents are appended to `builder`.
|
||||||
|
let mut buf = [0u8; BUF_SIZE];
|
||||||
|
let mut buf_out = [0u8; BUF_SIZE];
|
||||||
|
let mut builder = RopeBuilder::new();
|
||||||
|
|
||||||
|
// By default, the encoding of the text is auto-detected via the
|
||||||
|
// `chardetng` crate which requires sample data from the reader.
|
||||||
|
// As a manual override to this auto-detection is possible, the
|
||||||
|
// same data is read into `buf` to ensure symmetry in the upcoming
|
||||||
|
// loop.
|
||||||
|
let (encoding, mut decoder, mut slice, mut is_empty) = {
|
||||||
|
let read = reader.read(&mut buf)?;
|
||||||
|
let is_empty = read == 0;
|
||||||
|
let encoding = encoding.unwrap_or_else(|| {
|
||||||
|
let mut encoding_detector = chardetng::EncodingDetector::new();
|
||||||
|
encoding_detector.feed(&buf, is_empty);
|
||||||
|
encoding_detector.guess(None, true)
|
||||||
|
});
|
||||||
|
let decoder = encoding.new_decoder();
|
||||||
|
|
||||||
|
// If the amount of bytes read from the reader is less than
|
||||||
|
// `buf.len()`, it is undesirable to read the bytes afterwards.
|
||||||
|
let slice = &buf[..read];
|
||||||
|
(encoding, decoder, slice, is_empty)
|
||||||
|
};
|
||||||
|
|
||||||
|
// `RopeBuilder::append()` expects a `&str`, so this is the "real"
|
||||||
|
// output buffer. When decoding, the number of bytes in the output
|
||||||
|
// buffer will often exceed the number of bytes in the input buffer.
|
||||||
|
// The `result` returned by `decode_to_str()` will state whether or
|
||||||
|
// not that happened. The contents of `buf_str` is appended to
|
||||||
|
// `builder` and it is reused for the next iteration of the decoding
|
||||||
|
// loop.
|
||||||
|
//
|
||||||
|
// As it is possible to read less than the buffer's maximum from `read()`
|
||||||
|
// even when the end of the reader has yet to be reached, the end of
|
||||||
|
// the reader is determined only when a `read()` call returns `0`.
|
||||||
|
//
|
||||||
|
// SAFETY: `buf_out` is a zero-initialized array, thus it will always
|
||||||
|
// contain valid UTF-8.
|
||||||
|
let buf_str = unsafe { std::str::from_utf8_unchecked_mut(&mut buf_out[..]) };
|
||||||
|
let mut total_written = 0usize;
|
||||||
|
loop {
|
||||||
|
let mut total_read = 0usize;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let (result, read, written, ..) = decoder.decode_to_str(
|
||||||
|
&slice[total_read..],
|
||||||
|
&mut buf_str[total_written..],
|
||||||
|
is_empty,
|
||||||
|
);
|
||||||
|
|
||||||
|
// These variables act as the read and write cursors of `buf` and `buf_str` respectively.
|
||||||
|
// They are necessary in case the output buffer fills before decoding of the entire input
|
||||||
|
// loop is complete. Otherwise, the loop would endlessly iterate over the same `buf` and
|
||||||
|
// the data inside the output buffer would be overwritten.
|
||||||
|
total_read += read;
|
||||||
|
total_written += written;
|
||||||
|
match result {
|
||||||
|
encoding_rs::CoderResult::InputEmpty => {
|
||||||
|
debug_assert_eq!(slice.len(), total_read);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
encoding_rs::CoderResult::OutputFull => {
|
||||||
|
debug_assert!(slice.len() > total_read);
|
||||||
|
builder.append(&buf_str[..total_written]);
|
||||||
|
total_written = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Once the end of the stream is reached, the output buffer is
|
||||||
|
// flushed and the loop terminates.
|
||||||
|
if is_empty {
|
||||||
|
debug_assert_eq!(reader.read(&mut buf)?, 0);
|
||||||
|
builder.append(&buf_str[..total_written]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Once the previous input has been processed and decoded, the next set of
|
||||||
|
// data is fetched from the reader. The end of the reader is determined to
|
||||||
|
// be when exactly `0` bytes were read from the reader, as per the invariants
|
||||||
|
// of the `Read` trait.
|
||||||
|
let read = reader.read(&mut buf)?;
|
||||||
|
slice = &buf[..read];
|
||||||
|
is_empty = read == 0;
|
||||||
|
}
|
||||||
|
let rope = builder.finish();
|
||||||
|
Ok((rope, encoding))
|
||||||
|
}
|
||||||
|
|
||||||
|
// The documentation and implementation of this function should be up-to-date with
|
||||||
|
// its sibling function, `from_reader()`.
|
||||||
|
//
|
||||||
|
/// Encodes the text inside `rope` into the given `encoding` and writes the
|
||||||
|
/// encoded output into `writer.` As a `Rope` can only contain valid UTF-8,
|
||||||
|
/// replacement characters may appear in the encoded text.
|
||||||
|
pub async fn to_writer<'a, W: tokio::io::AsyncWriteExt + Unpin + ?Sized>(
|
||||||
|
writer: &'a mut W,
|
||||||
|
encoding: &'static encoding_rs::Encoding,
|
||||||
|
rope: &'a Rope,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
// Text inside a `Rope` is stored as non-contiguous blocks of data called
|
||||||
|
// chunks. The absolute size of each chunk is unknown, thus it is impossible
|
||||||
|
// to predict the end of the chunk iterator ahead of time. Instead, it is
|
||||||
|
// determined by filtering the iterator to remove all empty chunks and then
|
||||||
|
// appending an empty chunk to it. This is valuable for detecting when all
|
||||||
|
// chunks in the `Rope` have been iterated over in the subsequent loop.
|
||||||
|
let iter = rope
|
||||||
|
.chunks()
|
||||||
|
.filter(|c| !c.is_empty())
|
||||||
|
.chain(std::iter::once(""));
|
||||||
|
let mut buf = [0u8; BUF_SIZE];
|
||||||
|
let mut encoder = encoding.new_encoder();
|
||||||
|
let mut total_written = 0usize;
|
||||||
|
for chunk in iter {
|
||||||
|
let is_empty = chunk.is_empty();
|
||||||
|
let mut total_read = 0usize;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let (result, read, written, ..) =
|
||||||
|
encoder.encode_from_utf8(&chunk[total_read..], &mut buf[total_written..], is_empty);
|
||||||
|
|
||||||
|
// These variables act as the read and write cursors of `chunk` and `buf` respectively.
|
||||||
|
// They are necessary in case the output buffer fills before encoding of the entire input
|
||||||
|
// loop is complete. Otherwise, the loop would endlessly iterate over the same `chunk` and
|
||||||
|
// the data inside the output buffer would be overwritten.
|
||||||
|
total_read += read;
|
||||||
|
total_written += written;
|
||||||
|
match result {
|
||||||
|
encoding_rs::CoderResult::InputEmpty => {
|
||||||
|
debug_assert_eq!(chunk.len(), total_read);
|
||||||
|
debug_assert!(buf.len() >= total_written);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
encoding_rs::CoderResult::OutputFull => {
|
||||||
|
debug_assert!(chunk.len() > total_read);
|
||||||
|
writer.write_all(&buf[..total_written]).await?;
|
||||||
|
total_written = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Once the end of the iterator is reached, the output buffer is
|
||||||
|
// flushed and the outer loop terminates.
|
||||||
|
if is_empty {
|
||||||
|
writer.write_all(&buf[..total_written]).await?;
|
||||||
|
writer.flush().await?;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Like std::mem::replace() except it allows the replacement value to be mapped from the
|
/// Like std::mem::replace() except it allows the replacement value to be mapped from the
|
||||||
/// original value.
|
/// original value.
|
||||||
fn take_with<T, F>(mut_ref: &mut T, closure: F)
|
fn take_with<T, F>(mut_ref: &mut T, closure: F)
|
||||||
|
@ -216,13 +387,15 @@ use helix_lsp::lsp;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
impl Document {
|
impl Document {
|
||||||
pub fn new(text: Rope) -> Self {
|
pub fn from(text: Rope, encoding: Option<&'static encoding_rs::Encoding>) -> Self {
|
||||||
|
let encoding = encoding.unwrap_or(encoding_rs::UTF_8);
|
||||||
let changes = ChangeSet::new(&text);
|
let changes = ChangeSet::new(&text);
|
||||||
let old_state = None;
|
let old_state = None;
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
id: DocumentId::default(),
|
id: DocumentId::default(),
|
||||||
path: None,
|
path: None,
|
||||||
|
encoding,
|
||||||
text,
|
text,
|
||||||
selections: HashMap::default(),
|
selections: HashMap::default(),
|
||||||
indent_style: IndentStyle::Spaces(4),
|
indent_style: IndentStyle::Spaces(4),
|
||||||
|
@ -242,29 +415,31 @@ impl Document {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: async fn?
|
// TODO: async fn?
|
||||||
pub fn load(
|
/// Create a new document from `path`. Encoding is auto-detected, but it can be manually
|
||||||
|
/// overwritten with the `encoding` parameter.
|
||||||
|
pub fn open(
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
|
encoding: Option<&'static encoding_rs::Encoding>,
|
||||||
theme: Option<&Theme>,
|
theme: Option<&Theme>,
|
||||||
config_loader: Option<&syntax::Loader>,
|
config_loader: Option<&syntax::Loader>,
|
||||||
) -> Result<Self, Error> {
|
) -> Result<Self, Error> {
|
||||||
use std::{fs::File, io::BufReader};
|
if !path.exists() {
|
||||||
|
return Ok(Self::default());
|
||||||
let mut doc = if !path.exists() {
|
|
||||||
Rope::from(DEFAULT_LINE_ENDING.as_str())
|
|
||||||
} else {
|
|
||||||
let file = File::open(&path).context(format!("unable to open {:?}", path))?;
|
|
||||||
Rope::from_reader(BufReader::new(file))?
|
|
||||||
};
|
|
||||||
|
|
||||||
// search for line endings
|
|
||||||
let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING);
|
|
||||||
|
|
||||||
// add missing newline at the end of file
|
|
||||||
if doc.len_bytes() == 0 || !char_is_line_ending(doc.char(doc.len_chars() - 1)) {
|
|
||||||
doc.insert(doc.len_chars(), line_ending.as_str());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut doc = Self::new(doc);
|
let mut file = std::fs::File::open(&path).context(format!("unable to open {:?}", path))?;
|
||||||
|
let (mut rope, encoding) = from_reader(&mut file, encoding)?;
|
||||||
|
|
||||||
|
// search for line endings
|
||||||
|
let line_ending = auto_detect_line_ending(&rope).unwrap_or(DEFAULT_LINE_ENDING);
|
||||||
|
|
||||||
|
// add missing newline at the end of file
|
||||||
|
if rope.len_bytes() == 0 || !char_is_line_ending(rope.char(rope.len_chars() - 1)) {
|
||||||
|
rope.insert(rope.len_chars(), line_ending.as_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut doc = Self::from(rope, Some(encoding));
|
||||||
|
|
||||||
// set the path and try detecting the language
|
// set the path and try detecting the language
|
||||||
doc.set_path(&path)?;
|
doc.set_path(&path)?;
|
||||||
doc.detect_indent_style();
|
doc.detect_indent_style();
|
||||||
|
@ -303,6 +478,8 @@ impl Document {
|
||||||
|
|
||||||
// TODO: do we need some way of ensuring two save operations on the same doc can't run at once?
|
// TODO: do we need some way of ensuring two save operations on the same doc can't run at once?
|
||||||
// or is that handled by the OS/async layer
|
// or is that handled by the OS/async layer
|
||||||
|
/// The `Document`'s text is encoded according to its encoding and written to the file located
|
||||||
|
/// at its `path()`.
|
||||||
pub fn save(&mut self) -> impl Future<Output = Result<(), anyhow::Error>> {
|
pub fn save(&mut self) -> impl Future<Output = Result<(), anyhow::Error>> {
|
||||||
// we clone and move text + path into the future so that we asynchronously save the current
|
// we clone and move text + path into the future so that we asynchronously save the current
|
||||||
// state without blocking any further edits.
|
// state without blocking any further edits.
|
||||||
|
@ -320,8 +497,11 @@ impl Document {
|
||||||
self.last_saved_revision = history.current_revision();
|
self.last_saved_revision = history.current_revision();
|
||||||
self.history.set(history);
|
self.history.set(history);
|
||||||
|
|
||||||
|
let encoding = self.encoding;
|
||||||
|
|
||||||
|
// We encode the file according to the `Document`'s encoding.
|
||||||
async move {
|
async move {
|
||||||
use tokio::{fs::File, io::AsyncWriteExt};
|
use tokio::fs::File;
|
||||||
if let Some(parent) = path.parent() {
|
if let Some(parent) = path.parent() {
|
||||||
// TODO: display a prompt asking the user if the directories should be created
|
// TODO: display a prompt asking the user if the directories should be created
|
||||||
if !parent.exists() {
|
if !parent.exists() {
|
||||||
|
@ -330,13 +510,9 @@ impl Document {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut file = File::create(path).await?;
|
|
||||||
|
|
||||||
// write all the rope chunks to file
|
let mut file = File::create(path).await?;
|
||||||
for chunk in text.chunks() {
|
to_writer(&mut file, encoding, &text).await?;
|
||||||
file.write_all(chunk.as_bytes()).await?;
|
|
||||||
}
|
|
||||||
// TODO: flush?
|
|
||||||
|
|
||||||
if let Some(language_server) = language_server {
|
if let Some(language_server) = language_server {
|
||||||
language_server
|
language_server
|
||||||
|
@ -531,7 +707,7 @@ impl Document {
|
||||||
self.selections.insert(view_id, selection);
|
self.selections.insert(view_id, selection);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn _apply(&mut self, transaction: &Transaction, view_id: ViewId) -> bool {
|
fn apply_impl(&mut self, transaction: &Transaction, view_id: ViewId) -> bool {
|
||||||
let old_doc = self.text().clone();
|
let old_doc = self.text().clone();
|
||||||
|
|
||||||
let success = transaction.changes().apply(&mut self.text);
|
let success = transaction.changes().apply(&mut self.text);
|
||||||
|
@ -594,7 +770,7 @@ impl Document {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let success = self._apply(transaction, view_id);
|
let success = self.apply_impl(transaction, view_id);
|
||||||
|
|
||||||
if !transaction.changes().is_empty() {
|
if !transaction.changes().is_empty() {
|
||||||
// Compose this transaction with the previous one
|
// Compose this transaction with the previous one
|
||||||
|
@ -608,7 +784,7 @@ impl Document {
|
||||||
pub fn undo(&mut self, view_id: ViewId) {
|
pub fn undo(&mut self, view_id: ViewId) {
|
||||||
let mut history = self.history.take();
|
let mut history = self.history.take();
|
||||||
let success = if let Some(transaction) = history.undo() {
|
let success = if let Some(transaction) = history.undo() {
|
||||||
self._apply(&transaction, view_id)
|
self.apply_impl(transaction, view_id)
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
};
|
};
|
||||||
|
@ -623,7 +799,7 @@ impl Document {
|
||||||
pub fn redo(&mut self, view_id: ViewId) {
|
pub fn redo(&mut self, view_id: ViewId) {
|
||||||
let mut history = self.history.take();
|
let mut history = self.history.take();
|
||||||
let success = if let Some(transaction) = history.redo() {
|
let success = if let Some(transaction) = history.redo() {
|
||||||
self._apply(&transaction, view_id)
|
self.apply_impl(transaction, view_id)
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
};
|
};
|
||||||
|
@ -638,14 +814,14 @@ impl Document {
|
||||||
pub fn earlier(&mut self, view_id: ViewId, uk: helix_core::history::UndoKind) {
|
pub fn earlier(&mut self, view_id: ViewId, uk: helix_core::history::UndoKind) {
|
||||||
let txns = self.history.get_mut().earlier(uk);
|
let txns = self.history.get_mut().earlier(uk);
|
||||||
for txn in txns {
|
for txn in txns {
|
||||||
self._apply(&txn, view_id);
|
self.apply_impl(&txn, view_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn later(&mut self, view_id: ViewId, uk: helix_core::history::UndoKind) {
|
pub fn later(&mut self, view_id: ViewId, uk: helix_core::history::UndoKind) {
|
||||||
let txns = self.history.get_mut().later(uk);
|
let txns = self.history.get_mut().later(uk);
|
||||||
for txn in txns {
|
for txn in txns {
|
||||||
self._apply(&txn, view_id);
|
self.apply_impl(&txn, view_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -670,12 +846,10 @@ impl Document {
|
||||||
self.history.set(history);
|
self.history.set(history);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn id(&self) -> DocumentId {
|
pub fn id(&self) -> DocumentId {
|
||||||
self.id
|
self.id
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn is_modified(&self) -> bool {
|
pub fn is_modified(&self) -> bool {
|
||||||
let history = self.history.take();
|
let history = self.history.take();
|
||||||
let current_revision = history.current_revision();
|
let current_revision = history.current_revision();
|
||||||
|
@ -683,12 +857,10 @@ impl Document {
|
||||||
current_revision != self.last_saved_revision || !self.changes.is_empty()
|
current_revision != self.last_saved_revision || !self.changes.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn mode(&self) -> Mode {
|
pub fn mode(&self) -> Mode {
|
||||||
self.mode
|
self.mode
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
/// Corresponding language scope name. Usually `source.<lang>`.
|
/// Corresponding language scope name. Usually `source.<lang>`.
|
||||||
pub fn language(&self) -> Option<&str> {
|
pub fn language(&self) -> Option<&str> {
|
||||||
self.language
|
self.language
|
||||||
|
@ -696,21 +868,21 @@ impl Document {
|
||||||
.map(|language| language.scope.as_str())
|
.map(|language| language.scope.as_str())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn language_config(&self) -> Option<&LanguageConfiguration> {
|
pub fn language_config(&self) -> Option<&LanguageConfiguration> {
|
||||||
self.language.as_deref()
|
self.language.as_deref()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
/// Current document version, incremented at each change.
|
/// Current document version, incremented at each change.
|
||||||
pub fn version(&self) -> i32 {
|
pub fn version(&self) -> i32 {
|
||||||
self.version
|
self.version
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn language_server(&self) -> Option<&helix_lsp::Client> {
|
pub fn language_server(&self) -> Option<&helix_lsp::Client> {
|
||||||
self.language_server.as_deref()
|
self.language_server.as_deref()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
/// Tree-sitter AST tree
|
/// Tree-sitter AST tree
|
||||||
pub fn syntax(&self) -> Option<&Syntax> {
|
pub fn syntax(&self) -> Option<&Syntax> {
|
||||||
self.syntax.as_ref()
|
self.syntax.as_ref()
|
||||||
|
@ -756,10 +928,12 @@ impl Document {
|
||||||
self.path().map(|path| Url::from_file_path(path).unwrap())
|
self.path().map(|path| Url::from_file_path(path).unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn text(&self) -> &Rope {
|
pub fn text(&self) -> &Rope {
|
||||||
&self.text
|
&self.text
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn selection(&self, view_id: ViewId) -> &Selection {
|
pub fn selection(&self, view_id: ViewId) -> &Selection {
|
||||||
&self.selections[&view_id]
|
&self.selections[&view_id]
|
||||||
}
|
}
|
||||||
|
@ -787,6 +961,7 @@ impl Document {
|
||||||
|
|
||||||
// -- LSP methods
|
// -- LSP methods
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn identifier(&self) -> lsp::TextDocumentIdentifier {
|
pub fn identifier(&self) -> lsp::TextDocumentIdentifier {
|
||||||
lsp::TextDocumentIdentifier::new(self.url().unwrap())
|
lsp::TextDocumentIdentifier::new(self.url().unwrap())
|
||||||
}
|
}
|
||||||
|
@ -795,6 +970,7 @@ impl Document {
|
||||||
lsp::VersionedTextDocumentIdentifier::new(self.url().unwrap(), self.version)
|
lsp::VersionedTextDocumentIdentifier::new(self.url().unwrap(), self.version)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn diagnostics(&self) -> &[Diagnostic] {
|
pub fn diagnostics(&self) -> &[Diagnostic] {
|
||||||
&self.diagnostics
|
&self.diagnostics
|
||||||
}
|
}
|
||||||
|
@ -804,6 +980,13 @@ impl Document {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for Document {
|
||||||
|
fn default() -> Self {
|
||||||
|
let text = Rope::from(DEFAULT_LINE_ENDING.as_str());
|
||||||
|
Self::from(text, None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -812,7 +995,7 @@ mod test {
|
||||||
fn changeset_to_changes() {
|
fn changeset_to_changes() {
|
||||||
use helix_lsp::{lsp, Client, OffsetEncoding};
|
use helix_lsp::{lsp, Client, OffsetEncoding};
|
||||||
let text = Rope::from("hello");
|
let text = Rope::from("hello");
|
||||||
let mut doc = Document::new(text);
|
let mut doc = Document::from(text, None);
|
||||||
let view = ViewId::default();
|
let view = ViewId::default();
|
||||||
doc.set_selection(view, Selection::single(5, 5));
|
doc.set_selection(view, Selection::single(5, 5));
|
||||||
|
|
||||||
|
@ -921,4 +1104,94 @@ mod test {
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_line_ending() {
|
||||||
|
if cfg!(windows) {
|
||||||
|
assert_eq!(Document::default().text().to_string(), "\r\n");
|
||||||
|
} else {
|
||||||
|
assert_eq!(Document::default().text().to_string(), "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! test_decode {
|
||||||
|
($label:expr, $label_override:expr) => {
|
||||||
|
let encoding = encoding_rs::Encoding::for_label($label_override.as_bytes()).unwrap();
|
||||||
|
let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests");
|
||||||
|
let path = base_path.join(format!("{}_in.txt", $label));
|
||||||
|
let ref_path = base_path.join(format!("{}_in_ref.txt", $label));
|
||||||
|
assert!(path.exists());
|
||||||
|
assert!(ref_path.exists());
|
||||||
|
|
||||||
|
let mut file = std::fs::File::open(path).unwrap();
|
||||||
|
let text = from_reader(&mut file, Some(encoding))
|
||||||
|
.unwrap()
|
||||||
|
.0
|
||||||
|
.to_string();
|
||||||
|
let expectation = std::fs::read_to_string(ref_path).unwrap();
|
||||||
|
assert_eq!(text[..], expectation[..]);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! test_encode {
|
||||||
|
($label:expr, $label_override:expr) => {
|
||||||
|
let encoding = encoding_rs::Encoding::for_label($label_override.as_bytes()).unwrap();
|
||||||
|
let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests");
|
||||||
|
let path = base_path.join(format!("{}_out.txt", $label));
|
||||||
|
let ref_path = base_path.join(format!("{}_out_ref.txt", $label));
|
||||||
|
assert!(path.exists());
|
||||||
|
assert!(ref_path.exists());
|
||||||
|
|
||||||
|
let text = Rope::from_str(&std::fs::read_to_string(path).unwrap());
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
helix_lsp::block_on(to_writer(&mut buf, encoding, &text)).unwrap();
|
||||||
|
|
||||||
|
let expectation = std::fs::read(ref_path).unwrap();
|
||||||
|
assert_eq!(buf, expectation);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! test_decode_fn {
|
||||||
|
($name:ident, $label:expr, $label_override:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
test_decode!($label, $label_override);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
($name:ident, $label:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
test_decode!($label, $label);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! test_encode_fn {
|
||||||
|
($name:ident, $label:expr, $label_override:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
test_encode!($label, $label_override);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
($name:ident, $label:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
test_encode!($label, $label);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_decode_fn!(test_big5_decode, "big5");
|
||||||
|
test_encode_fn!(test_big5_encode, "big5");
|
||||||
|
test_decode_fn!(test_euc_kr_decode, "euc_kr", "EUC-KR");
|
||||||
|
test_encode_fn!(test_euc_kr_encode, "euc_kr", "EUC-KR");
|
||||||
|
test_decode_fn!(test_gb18030_decode, "gb18030");
|
||||||
|
test_encode_fn!(test_gb18030_encode, "gb18030");
|
||||||
|
test_decode_fn!(test_iso_2022_jp_decode, "iso_2022_jp", "ISO-2022-JP");
|
||||||
|
test_encode_fn!(test_iso_2022_jp_encode, "iso_2022_jp", "ISO-2022-JP");
|
||||||
|
test_decode_fn!(test_jis0208_decode, "jis0208", "EUC-JP");
|
||||||
|
test_encode_fn!(test_jis0208_encode, "jis0208", "EUC-JP");
|
||||||
|
test_decode_fn!(test_jis0212_decode, "jis0212", "EUC-JP");
|
||||||
|
test_decode_fn!(test_shift_jis_decode, "shift_jis");
|
||||||
|
test_encode_fn!(test_shift_jis_encode, "shift_jis");
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ use anyhow::Error;
|
||||||
|
|
||||||
pub use helix_core::diagnostic::Severity;
|
pub use helix_core::diagnostic::Severity;
|
||||||
pub use helix_core::register::Registers;
|
pub use helix_core::register::Registers;
|
||||||
use helix_core::{Position, DEFAULT_LINE_ENDING};
|
use helix_core::Position;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Editor {
|
pub struct Editor {
|
||||||
|
@ -171,8 +171,7 @@ impl Editor {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_file(&mut self, action: Action) -> DocumentId {
|
pub fn new_file(&mut self, action: Action) -> DocumentId {
|
||||||
use helix_core::Rope;
|
let doc = Document::default();
|
||||||
let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str()));
|
|
||||||
let id = self.documents.insert(doc);
|
let id = self.documents.insert(doc);
|
||||||
self.documents[id].id = id;
|
self.documents[id].id = id;
|
||||||
self.switch(id, action);
|
self.switch(id, action);
|
||||||
|
@ -190,7 +189,7 @@ impl Editor {
|
||||||
let id = if let Some(id) = id {
|
let id = if let Some(id) = id {
|
||||||
id
|
id
|
||||||
} else {
|
} else {
|
||||||
let mut doc = Document::load(path, Some(&self.theme), Some(&self.syn_loader))?;
|
let mut doc = Document::open(path, None, Some(&self.theme), Some(&self.syn_loader))?;
|
||||||
|
|
||||||
// try to find a language server based on the language name
|
// try to find a language server based on the language name
|
||||||
let language_server = doc
|
let language_server = doc
|
||||||
|
|
Loading…
Add table
Reference in a new issue