More robust syntax detection/grammar loading.

This commit is contained in:
Blaž Hrastnik 2020-09-22 18:23:48 +09:00
parent 2c3b10dbb0
commit eb639eb2e4
11 changed files with 206 additions and 141 deletions

4
.gitmodules vendored
View file

@ -78,3 +78,7 @@
path = helix-syntax/languages/tree-sitter-swift
url = https://github.com/tree-sitter/tree-sitter-swift
shallow = true
[submodule "helix-syntax/languages/tree-sitter-toml"]
path = helix-syntax/languages/tree-sitter-toml
url = https://github.com/ikatyang/tree-sitter-toml
shallow = true

30
Cargo.lock generated
View file

@ -73,15 +73,16 @@ dependencies = [
[[package]]
name = "async-executor"
version = "1.1.1"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a831e74aa1937d3bbd3a356f34c23dbc6b6f0abc5160bd5484a9f75d5e76aea8"
checksum = "d373d78ded7d0b3fa8039375718cde0aace493f2e34fb60f51cbf567562ca801"
dependencies = [
"async-task",
"concurrent-queue",
"fastrand",
"futures-lite",
"once_cell",
"vec-arena",
]
[[package]]
@ -135,12 +136,13 @@ dependencies = [
[[package]]
name = "async-net"
version = "1.3.0"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a48af5438be856056bdeb6c5d895148a715be5915fccee49d1e5b50851dc9b8b"
checksum = "fb04482b77baa38d59d56aee0a7b4266600ab28e2b8be7af03508f6a30ecbdcf"
dependencies = [
"async-io",
"blocking",
"fastrand",
"futures-lite",
]
@ -162,9 +164,9 @@ dependencies = [
[[package]]
name = "async-rwlock"
version = "1.1.0"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f8978b5ae008b5177da07a1bf1bfbe428f9bdb970c3fca0e92ed1c1930d7f34"
checksum = "806b1cc0828c2b1611ccbdd743fc0cc7af09009e62c95a0501c1e5da7b142a22"
dependencies = [
"async-mutex",
"event-listener",
@ -181,9 +183,9 @@ dependencies = [
[[package]]
name = "async-task"
version = "3.0.0"
version = "4.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c17772156ef2829aadc587461c7753af20b7e8db1529bc66855add962a3b35d3"
checksum = "6725e96011a83fae25074a8734932e8d67763522839be7473dcfe8a0d6a378b1"
[[package]]
name = "atomic-waker"
@ -428,9 +430,9 @@ dependencies = [
[[package]]
name = "hermit-abi"
version = "0.1.15"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9"
checksum = "4c30f6d0bc6b00693347368a67d41b58f2fb851215ff1da49e90fe2c5c667151"
dependencies = [
"libc",
]
@ -615,9 +617,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "polling"
version = "1.0.3"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0307b8c7f438902536321f63c28cab0362f6ee89f1c7da47e3642ff956641c8b"
checksum = "e0720e0b9ea9d52451cf29d3413ba8a9303f8815d9d9653ef70e03ff73e65566"
dependencies = [
"cfg-if",
"libc",
@ -712,9 +714,9 @@ checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252"
[[package]]
name = "smol"
version = "1.0.1"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712d02afa6ac9e7b8c777fd181aff476d009280b54b8c28703d10fa5d7e80d83"
checksum = "d41237ba3e3ada55ff3515d37becc8fa90e5e4af2b13a011ec3f932d9f1b2405"
dependencies = [
"async-channel",
"async-executor",

View file

@ -1,5 +1,6 @@
- Implement backspace/delete
- Implement marks
helper methods: iterate over selection spans in the document.
- Implement marks (superset of Selection/Range)
- Implement style configs, tab settings
- Visual tab width
- Refactor tree-sitter-highlight to work like the atom one, recomputing partial tree updates.

View file

@ -17,3 +17,4 @@ unicode-segmentation = "1.6.0"
unicode-width = "0.1.7"
# slab = "0.4.2"
tree-sitter = "0.16.1"
once_cell = "1.4.1"

View file

@ -1,5 +1,4 @@
#![allow(unused)]
pub mod config;
pub mod graphemes;
pub mod macros;
mod position;

View file

@ -1,4 +1,5 @@
use crate::graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary, RopeGraphemes};
use crate::syntax::LOADER;
use crate::{Position, Range, Rope, RopeSlice, Selection, Syntax};
use anyhow::Error;
@ -48,7 +49,8 @@ impl State {
}
}
pub fn load(path: PathBuf) -> Result<Self, Error> {
// TODO: passing scopes here is awkward
pub fn load(path: PathBuf, scopes: &[String]) -> Result<Self, Error> {
use std::{env, fs::File, io::BufReader, path::PathBuf};
let _current_dir = env::current_dir()?;
@ -57,31 +59,18 @@ impl State {
// TODO: create if not found
let mut state = Self::new(doc);
if let Some(language_config) = LOADER.language_config_for_file_name(path.as_path()) {
let highlight_config = language_config.highlight_config(scopes).unwrap().unwrap();
// TODO: config.configure(scopes) is now delayed, is that ok?
let syntax = Syntax::new(&state.doc, highlight_config.clone());
state.syntax = Some(syntax);
};
state.path = Some(path);
let language = helix_syntax::get_language(&helix_syntax::LANG::Rust);
let mut highlight_config = crate::syntax::HighlightConfiguration::new(
language,
&std::fs::read_to_string(
"../helix-syntax/languages/tree-sitter-rust/queries/highlights.scm",
)
.unwrap(),
&std::fs::read_to_string(
"../helix-syntax/languages/tree-sitter-rust/queries/injections.scm",
)
.unwrap(),
"", // locals.scm
)
.unwrap();
// TODO: config.configure(scopes) is now delayed, is that ok?
// TODO: get_language is called twice
let syntax = Syntax::new(helix_syntax::LANG::Rust, &state.doc, highlight_config);
state.syntax = Some(syntax);
Ok(state)
}

View file

@ -2,21 +2,161 @@ use crate::{Change, Rope, RopeSlice, Transaction};
pub use helix_syntax::LANG;
pub use helix_syntax::{get_language, get_language_name};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use once_cell::sync::OnceCell;
// largely based on tree-sitter/cli/src/loader.rs
pub struct LanguageConfiguration {
pub(crate) scope: String, // source.rust
pub(crate) file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
pub(crate) path: PathBuf,
// content_regex
// injection_regex
// first_line_regex
//
// root_path
//
pub(crate) language_id: LANG,
pub(crate) highlight_config: OnceCell<Option<Arc<HighlightConfiguration>>>,
// tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
}
impl LanguageConfiguration {
pub fn highlight_config(
&self,
scopes: &[String],
) -> Result<Option<&Arc<HighlightConfiguration>>, anyhow::Error> {
self.highlight_config
.get_or_try_init(|| {
// let name = get_language_name(&self.language_id);
let highlights_query =
std::fs::read_to_string(self.path.join("queries/highlights.scm"))
.unwrap_or(String::new());
let injections_query =
std::fs::read_to_string(self.path.join("queries/injections.scm"))
.unwrap_or(String::new());
let locals_query = "";
if highlights_query.is_empty() {
Ok(None)
} else {
let language = get_language(&self.language_id);
let mut config = HighlightConfiguration::new(
language,
&highlights_query,
&injections_query,
&locals_query,
)
.unwrap(); // TODO: no unwrap
config.configure(&scopes);
Ok(Some(Arc::new(config)))
}
})
.map(Option::as_ref)
}
}
use once_cell::sync::Lazy;
pub(crate) static LOADER: Lazy<Loader> = Lazy::new(|| Loader::init());
pub struct Loader {
// highlight_names ?
language_configs: Vec<Arc<LanguageConfiguration>>,
language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>
}
impl Loader {
fn init() -> Loader {
let mut loader = Loader {
language_configs: Vec::new(),
language_config_ids_by_file_type: HashMap::new(),
};
// hardcoded from now, might load from toml
let configs = vec![
LanguageConfiguration {
scope: "source.rust".to_string(),
file_types: vec!["rs".to_string()],
language_id: LANG::Rust,
highlight_config: OnceCell::new(),
//
path: "../helix-syntax/languages/tree-sitter-rust".into(),
},
LanguageConfiguration {
scope: "source.toml".to_string(),
file_types: vec!["toml".to_string()],
language_id: LANG::Toml,
highlight_config: OnceCell::new(),
//
path: "../helix-syntax/languages/tree-sitter-toml".into(),
},
];
for config in configs {
// get the next id
let language_id = loader.language_configs.len();
for file_type in &config.file_types {
// entry().or_insert(Vec::new).push(language_id);
loader
.language_config_ids_by_file_type
.insert(file_type.clone(), language_id);
}
loader.language_configs.push(Arc::new(config));
}
loader
}
pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
// Find all the language configurations that match this file name
// or a suffix of the file name.
let configuration_id = path
.file_name()
.and_then(|n| n.to_str())
.and_then(|file_name| self.language_config_ids_by_file_type.get(file_name))
.or_else(|| {
path.extension()
.and_then(|extension| extension.to_str())
.and_then(|extension| self.language_config_ids_by_file_type.get(extension))
});
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
// TODO: content_regex handling conflict resolution
}
}
//
pub struct Syntax {
grammar: Language,
// grammar: Grammar,
parser: Parser,
cursors: Vec<QueryCursor>,
config: HighlightConfiguration,
config: Arc<HighlightConfiguration>,
root_layer: LanguageLayer,
}
impl Syntax {
// buffer, grammar, config, grammars, sync_timeout?
pub fn new(language: LANG, source: &Rope, config: HighlightConfiguration) -> Self {
pub fn new(
/*language: LANG,*/ source: &Rope,
config: Arc<HighlightConfiguration>,
) -> Self {
// fetch grammar for parser based on language string
let grammar = get_language(&language);
// let grammar = get_language(&language);
let parser = Parser::new();
let root_layer = LanguageLayer::new();
@ -25,7 +165,7 @@ impl Syntax {
// track scope_descriptor: a Vec of scopes for item in tree
let mut syntax = Self {
grammar,
// grammar,
parser,
cursors: Vec::new(),
config,
@ -48,10 +188,6 @@ impl Syntax {
syntax
}
pub fn configure(&mut self, scopes: &[String]) {
self.config.configure(scopes)
}
pub fn update(&mut self, source: &Rope, changeset: &ChangeSet) -> Result<(), Error> {
self.root_layer
.update(&mut self.parser, &self.config, source, changeset)
@ -88,9 +224,9 @@ impl Syntax {
let mut cursor = QueryCursor::new(); // reuse a pool
let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(self.tree()) };
let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
let query_ref = unsafe { mem::transmute::<_, &'static mut Query>(&mut self.config.query) };
let query_ref = unsafe { mem::transmute::<_, &'static Query>(&self.config.query) };
let config_ref =
unsafe { mem::transmute::<_, &'static HighlightConfiguration>(&self.config) };
unsafe { mem::transmute::<_, &'static HighlightConfiguration>(self.config.as_ref()) };
// TODO: if reusing cursors this might need resetting
if let Some(range) = &range {
@ -432,8 +568,8 @@ impl LanguageLayer {
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{iter, mem, ops, str, usize};
use tree_sitter::{
Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch,
Range, Tree,
Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
QueryMatch, Range, Tree,
};
const CANCELLATION_CHECK_INTERVAL: usize = 100;
@ -462,7 +598,7 @@ pub enum HighlightEvent {
///
/// This struct is immutable and can be shared between threads.
pub struct HighlightConfiguration {
pub language: Language,
pub language: Grammar,
pub query: Query,
combined_injections_query: Option<Query>,
locals_pattern_index: usize,
@ -477,16 +613,6 @@ pub struct HighlightConfiguration {
local_ref_capture_index: Option<u32>,
}
/// Performs syntax highlighting, recognizing a given list of highlight names.
///
/// For the best performance `Highlighter` values should be reused between
/// syntax highlighting calls. A separate highlighter is needed for each thread that
/// is performing highlighting.
pub struct Highlighter {
parser: Parser,
cursors: Vec<QueryCursor>,
}
#[derive(Debug)]
struct LocalDef<'a> {
name: &'a str,
@ -527,70 +653,13 @@ struct HighlightIterLayer<'a> {
depth: usize,
}
impl Default for Highlighter {
fn default() -> Self {
Highlighter {
parser: Parser::new(),
cursors: Vec::new(),
}
}
}
impl Highlighter {
pub fn new() -> Self {
Self::default()
}
pub fn parser(&mut self) -> &mut Parser {
&mut self.parser
}
// /// Iterate over the highlighted regions for a given slice of source code.
// pub fn highlight<'a>(
// &'a mut self,
// config: &'a HighlightConfiguration,
// source: &'a [u8],
// cancellation_flag: Option<&'a AtomicUsize>,
// mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
// ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
// let layers = HighlightIterLayer::new(
// source,
// self,
// cancellation_flag,
// &mut injection_callback,
// config,
// 0,
// vec![Range {
// start_byte: 0,
// end_byte: usize::MAX,
// start_point: Point::new(0, 0),
// end_point: Point::new(usize::MAX, usize::MAX),
// }],
// )?;
// assert_ne!(layers.len(), 0);
// let mut result = HighlightIter {
// source,
// byte_offset: 0,
// injection_callback,
// cancellation_flag,
// highlighter: self,
// iter_count: 0,
// layers,
// next_event: None,
// last_highlight_range: None,
// };
// result.sort_layers();
// Ok(result)
// }
}
impl HighlightConfiguration {
/// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
/// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
/// queries.
///
/// # Parameters
///
/// * `language` - The Tree-sitter `Language` that should be used for parsing.
/// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
/// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
/// should be non-empty, otherwise no syntax highlights will be added.
/// * `injections_query` - A string containing tree patterns for injecting other languages
@ -600,7 +669,7 @@ impl HighlightConfiguration {
///
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
pub fn new(
language: Language,
language: Grammar,
highlights_query: &str,
injection_query: &str,
locals_query: &str,

@ -0,0 +1 @@
Subproject commit 42c9ff20c0371bed7f514036e823f10793caacec

View file

@ -82,6 +82,7 @@ mk_langs!(
(Rust, tree_sitter_rust),
(Scala, tree_sitter_scala),
(Swift, tree_sitter_swift),
(Toml, tree_sitter_toml),
(Tsx, tree_sitter_tsx),
(Typescript, tree_sitter_typescript)
);

View file

@ -1,10 +1,5 @@
use crate::Args;
use helix_core::{
state::coords_at_pos,
state::Mode,
syntax::{HighlightConfiguration, HighlightEvent, Highlighter},
State,
};
use helix_core::{state::coords_at_pos, state::Mode, syntax::HighlightEvent, State};
use helix_view::{commands, keymap, View};
use std::{
@ -107,14 +102,18 @@ impl Editor {
// TODO: cache highlight results
// TODO: only recalculate when state.doc is actually modified
let highlights: Vec<_> = view
.state
.syntax
.as_mut()
.unwrap()
.highlight_iter(source_code.as_bytes(), Some(range), None, |_| None)
.unwrap()
.collect(); // TODO: we collect here to avoid double borrow, fix later
let highlights: Vec<_> = match view.state.syntax.as_mut() {
Some(syntax) => {
syntax
.highlight_iter(source_code.as_bytes(), Some(range), None, |_| None)
.unwrap()
.collect() // TODO: we collect here to avoid double borrow, fix later
}
None => vec![Ok(HighlightEvent::Source {
start: range.start,
end: range.end,
})],
};
let mut spans = Vec::new();

View file

@ -14,9 +14,8 @@ pub struct View {
impl View {
pub fn open(path: PathBuf, size: (u16, u16)) -> Result<View, Error> {
let mut state = State::load(path)?;
let theme = Theme::default();
state.syntax.as_mut().unwrap().configure(theme.scopes());
let state = State::load(path, theme.scopes())?;
let view = View {
state,