Dynamically load grammar libraries at runtime
This commit is contained in:
parent
dd5e8082e4
commit
dd2903ff10
8 changed files with 203 additions and 168 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -3,3 +3,4 @@ target
|
|||
helix-term/rustfmt.toml
|
||||
helix-syntax/languages/
|
||||
result
|
||||
runtime/grammars
|
||||
|
|
25
Cargo.lock
generated
25
Cargo.lock
generated
|
@ -61,9 +61,6 @@ name = "cc"
|
|||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
|
@ -354,8 +351,9 @@ dependencies = [
|
|||
name = "helix-syntax"
|
||||
version = "0.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cc",
|
||||
"serde",
|
||||
"libloading",
|
||||
"threadpool",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
@ -475,15 +473,6 @@ version = "0.4.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "972f5ae5d1cb9c6ae417789196c803205313edde988685da5e3aae0827b9e7fd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonrpc-core"
|
||||
version = "17.1.0"
|
||||
|
@ -509,6 +498,16 @@ version = "0.2.97"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.4"
|
||||
|
|
|
@ -253,14 +253,14 @@ where
|
|||
|
||||
let doc = Rope::from(doc);
|
||||
use crate::syntax::{
|
||||
Configuration, IndentationConfiguration, Lang, LanguageConfiguration, Loader,
|
||||
Configuration, IndentationConfiguration, LanguageConfiguration, Loader,
|
||||
};
|
||||
use once_cell::sync::OnceCell;
|
||||
let loader = Loader::new(Configuration {
|
||||
language: vec![LanguageConfiguration {
|
||||
scope: "source.rust".to_string(),
|
||||
file_types: vec!["rs".to_string()],
|
||||
language_id: Lang::Rust,
|
||||
language_id: "Rust".to_string(),
|
||||
highlight_config: OnceCell::new(),
|
||||
//
|
||||
roots: vec![],
|
||||
|
|
|
@ -5,7 +5,7 @@ use crate::{
|
|||
Rope, RopeSlice, Tendril,
|
||||
};
|
||||
|
||||
pub use helix_syntax::{get_language, get_language_name, Lang};
|
||||
pub use helix_syntax::get_language;
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
|
||||
|
@ -31,7 +31,7 @@ pub struct Configuration {
|
|||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct LanguageConfiguration {
|
||||
#[serde(rename = "name")]
|
||||
pub(crate) language_id: Lang,
|
||||
pub(crate) language_id: String,
|
||||
pub scope: String, // source.rust
|
||||
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
|
||||
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
|
||||
|
@ -153,7 +153,7 @@ fn read_query(language: &str, filename: &str) -> String {
|
|||
|
||||
impl LanguageConfiguration {
|
||||
fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
|
||||
let language = get_language_name(self.language_id).to_ascii_lowercase();
|
||||
let language = self.language_id.to_ascii_lowercase();
|
||||
|
||||
let highlights_query = read_query(&language, "highlights.scm");
|
||||
// always highlight syntax errors
|
||||
|
@ -166,7 +166,7 @@ impl LanguageConfiguration {
|
|||
if highlights_query.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let language = get_language(self.language_id);
|
||||
let language = get_language(&crate::RUNTIME_DIR, &self.language_id).ok()?;
|
||||
let config = HighlightConfiguration::new(
|
||||
language,
|
||||
&highlights_query,
|
||||
|
@ -198,7 +198,7 @@ impl LanguageConfiguration {
|
|||
pub fn indent_query(&self) -> Option<&IndentQuery> {
|
||||
self.indent_query
|
||||
.get_or_init(|| {
|
||||
let language = get_language_name(self.language_id).to_ascii_lowercase();
|
||||
let language = self.language_id.to_ascii_lowercase();
|
||||
|
||||
let toml = load_runtime_file(&language, "indents.toml").ok()?;
|
||||
toml::from_slice(toml.as_bytes()).ok()
|
||||
|
@ -1802,7 +1802,7 @@ mod test {
|
|||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
let language = get_language(Lang::Rust);
|
||||
let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap();
|
||||
let config = HighlightConfiguration::new(
|
||||
language,
|
||||
&std::fs::read_to_string(
|
||||
|
|
|
@ -12,8 +12,10 @@ include = ["src/**/*", "languages/**/*", "build.rs", "!**/docs/**/*", "!**/test/
|
|||
|
||||
[dependencies]
|
||||
tree-sitter = "0.19"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
libloading = "0.7"
|
||||
anyhow = "1"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1", features = ["parallel"] }
|
||||
cc = { version = "1" }
|
||||
threadpool = { version = "1.0" }
|
||||
anyhow = "1"
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
use anyhow::Result;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use std::sync::mpsc::channel;
|
||||
|
||||
|
@ -15,66 +17,156 @@ fn collect_tree_sitter_dirs(ignore: &[String]) -> Vec<String> {
|
|||
dirs
|
||||
}
|
||||
|
||||
fn collect_src_files(dir: &str) -> (Vec<String>, Vec<String>) {
|
||||
eprintln!("Collect files for {}", dir);
|
||||
#[cfg(unix)]
|
||||
const DYLIB_EXTENSION: &str = "so";
|
||||
|
||||
let mut c_files = Vec::new();
|
||||
let mut cpp_files = Vec::new();
|
||||
let path = PathBuf::from("languages").join(&dir).join("src");
|
||||
for entry in fs::read_dir(path).unwrap().flatten() {
|
||||
let path = entry.path();
|
||||
if path
|
||||
.file_stem()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.starts_with("binding")
|
||||
{
|
||||
continue;
|
||||
#[cfg(windows)]
|
||||
const DYLIB_EXTENSION: &str = "dll";
|
||||
|
||||
// const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use std::{path::Path, process::Command};
|
||||
|
||||
fn build_library(src_path: &Path, language: &str) -> Result<()> {
|
||||
let header_path = src_path;
|
||||
// let grammar_path = src_path.join("grammar.json");
|
||||
let parser_path = src_path.join("parser.c");
|
||||
let mut scanner_path = src_path.join("scanner.c");
|
||||
|
||||
let scanner_path = if scanner_path.exists() {
|
||||
Some(scanner_path)
|
||||
} else {
|
||||
scanner_path.set_extension("cc");
|
||||
if scanner_path.exists() {
|
||||
Some(scanner_path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
if let Some(ext) = path.extension() {
|
||||
if ext == "c" {
|
||||
c_files.push(path.to_str().unwrap().to_string());
|
||||
} else if ext == "cc" || ext == "cpp" || ext == "cxx" {
|
||||
cpp_files.push(path.to_str().unwrap().to_string());
|
||||
};
|
||||
let parser_lib_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../runtime/grammars");
|
||||
let mut library_path = parser_lib_path.join(language);
|
||||
library_path.set_extension(DYLIB_EXTENSION);
|
||||
|
||||
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
|
||||
.with_context(|| "Failed to compare source and binary timestamps")?;
|
||||
|
||||
if !recompile {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut config = cc::Build::new();
|
||||
config.cpp(true).opt_level(2).cargo_metadata(false);
|
||||
// .target(BUILD_TARGET)
|
||||
// .host(BUILD_TARGET);
|
||||
let compiler = config.get_compiler();
|
||||
let mut command = Command::new(compiler.path());
|
||||
for (key, value) in compiler.env() {
|
||||
command.env(key, value);
|
||||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
command
|
||||
.args(&["/nologo", "/LD", "/I"])
|
||||
.arg(header_path)
|
||||
.arg("/Od")
|
||||
.arg(parser_path);
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
command.arg(scanner_path);
|
||||
}
|
||||
command
|
||||
.arg("/link")
|
||||
.arg(format!("/out:{}", library_path.to_str().unwrap()));
|
||||
} else {
|
||||
command
|
||||
.arg("-shared")
|
||||
.arg("-fPIC")
|
||||
.arg("-fno-exceptions")
|
||||
.arg("-g")
|
||||
.arg("-I")
|
||||
.arg(header_path)
|
||||
.arg("-o")
|
||||
.arg(&library_path)
|
||||
.arg("-O2");
|
||||
if let Some(scanner_path) = scanner_path.as_ref() {
|
||||
if scanner_path.extension() == Some("c".as_ref()) {
|
||||
command.arg("-xc").arg("-std=c99").arg(scanner_path);
|
||||
} else {
|
||||
command.arg(scanner_path);
|
||||
}
|
||||
}
|
||||
command.arg("-xc").arg(parser_path);
|
||||
}
|
||||
(c_files, cpp_files)
|
||||
|
||||
let output = command
|
||||
.output()
|
||||
.with_context(|| "Failed to execute C compiler")?;
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"Parser compilation failed.\nStdout: {}\nStderr: {}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
fn needs_recompile(
|
||||
lib_path: &Path,
|
||||
parser_c_path: &Path,
|
||||
scanner_path: &Option<PathBuf>,
|
||||
) -> Result<bool> {
|
||||
if !lib_path.exists() {
|
||||
return Ok(true);
|
||||
}
|
||||
let lib_mtime = mtime(lib_path)?;
|
||||
if mtime(parser_c_path)? > lib_mtime {
|
||||
return Ok(true);
|
||||
}
|
||||
if let Some(scanner_path) = scanner_path {
|
||||
if mtime(scanner_path)? > lib_mtime {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn build_c(files: Vec<String>, language: &str) {
|
||||
let mut build = cc::Build::new();
|
||||
for file in files {
|
||||
build
|
||||
.file(&file)
|
||||
.include(PathBuf::from(file).parent().unwrap())
|
||||
.pic(true)
|
||||
.warnings(false);
|
||||
}
|
||||
build.compile(&format!("tree-sitter-{}-c", language));
|
||||
fn mtime(path: &Path) -> Result<SystemTime> {
|
||||
Ok(fs::metadata(path)?.modified()?)
|
||||
}
|
||||
|
||||
fn build_cpp(files: Vec<String>, language: &str) {
|
||||
let mut build = cc::Build::new();
|
||||
// fn build_c(files: Vec<String>, language: &str) {
|
||||
// let mut build = cc::Build::new();
|
||||
// for file in files {
|
||||
// build
|
||||
// .file(&file)
|
||||
// .include(PathBuf::from(file).parent().unwrap())
|
||||
// .pic(true)
|
||||
// .warnings(false);
|
||||
// }
|
||||
// build.compile(&format!("tree-sitter-{}-c", language));
|
||||
// }
|
||||
|
||||
let flag = if build.get_compiler().is_like_msvc() {
|
||||
"/std:c++17"
|
||||
} else {
|
||||
"-std=c++14"
|
||||
};
|
||||
// fn build_cpp(files: Vec<String>, language: &str) {
|
||||
// let mut build = cc::Build::new();
|
||||
|
||||
for file in files {
|
||||
build
|
||||
.file(&file)
|
||||
.include(PathBuf::from(file).parent().unwrap())
|
||||
.pic(true)
|
||||
.warnings(false)
|
||||
.cpp(true)
|
||||
.flag_if_supported(flag);
|
||||
}
|
||||
build.compile(&format!("tree-sitter-{}-cpp", language));
|
||||
}
|
||||
// let flag = if build.get_compiler().is_like_msvc() {
|
||||
// "/std:c++17"
|
||||
// } else {
|
||||
// "-std=c++14"
|
||||
// };
|
||||
|
||||
// for file in files {
|
||||
// build
|
||||
// .file(&file)
|
||||
// .include(PathBuf::from(file).parent().unwrap())
|
||||
// .pic(true)
|
||||
// .warnings(false)
|
||||
// .cpp(true)
|
||||
// .flag_if_supported(flag);
|
||||
// }
|
||||
// build.compile(&format!("tree-sitter-{}-cpp", language));
|
||||
// }
|
||||
|
||||
fn build_dir(dir: &str, language: &str) {
|
||||
println!("Build language {}", language);
|
||||
|
@ -92,13 +184,9 @@ fn build_dir(dir: &str, language: &str) {
|
|||
eprintln!("You can fix in using 'git submodule init && git submodule update --recursive'.");
|
||||
std::process::exit(1);
|
||||
}
|
||||
let (c, cpp) = collect_src_files(dir);
|
||||
if !c.is_empty() {
|
||||
build_c(c, language);
|
||||
}
|
||||
if !cpp.is_empty() {
|
||||
build_cpp(cpp, language);
|
||||
}
|
||||
|
||||
let path = Path::new("languages").join(dir).join("src");
|
||||
build_library(&path, language).unwrap();
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
@ -129,6 +217,6 @@ fn main() {
|
|||
// drop(tx);
|
||||
assert_eq!(rx.try_iter().sum::<usize>(), n_jobs);
|
||||
|
||||
build_dir("tree-sitter-typescript/tsx", "tsx");
|
||||
build_dir("tree-sitter-typescript/typescript", "typescript");
|
||||
// build_dir("tree-sitter-typescript/tsx", "tsx");
|
||||
// build_dir("tree-sitter-typescript/typescript", "typescript");
|
||||
}
|
||||
|
|
|
@ -1,94 +1,39 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use anyhow::{Context, Result};
|
||||
use libloading::{Library, Symbol};
|
||||
use tree_sitter::Language;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! mk_extern {
|
||||
( $( $name:ident ),* ) => {
|
||||
$(
|
||||
extern "C" { pub fn $name() -> Language; }
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! mk_enum {
|
||||
( $( $camel:ident ),* ) => {
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Lang {
|
||||
$(
|
||||
$camel,
|
||||
)*
|
||||
fn replace_dashes_with_underscores(name: &str) -> String {
|
||||
let mut result = String::with_capacity(name.len());
|
||||
for c in name.chars() {
|
||||
if c == '-' {
|
||||
result.push('_');
|
||||
} else {
|
||||
result.push(c);
|
||||
}
|
||||
};
|
||||
}
|
||||
result
|
||||
}
|
||||
#[cfg(unix)]
|
||||
const DYLIB_EXTENSION: &str = "so";
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! mk_get_language {
|
||||
( $( ($camel:ident, $name:ident) ),* ) => {
|
||||
#[must_use]
|
||||
pub fn get_language(lang: Lang) -> Language {
|
||||
unsafe {
|
||||
match lang {
|
||||
$(
|
||||
Lang::$camel => $name(),
|
||||
)*
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(windows)]
|
||||
const DYLIB_EXTENSION: &str = "dll";
|
||||
|
||||
pub fn get_language(runtime_path: &std::path::Path, name: &str) -> Result<Language> {
|
||||
let name = name.to_ascii_lowercase();
|
||||
let mut library_path = runtime_path.join("grammars").join(&name);
|
||||
// TODO: duplicated under build
|
||||
library_path.set_extension(DYLIB_EXTENSION);
|
||||
|
||||
let library = unsafe { Library::new(&library_path) }
|
||||
.with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
|
||||
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(&name));
|
||||
let language = unsafe {
|
||||
let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
|
||||
.get(language_fn_name.as_bytes())
|
||||
.with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
|
||||
language_fn()
|
||||
};
|
||||
std::mem::forget(library);
|
||||
Ok(language)
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! mk_get_language_name {
|
||||
( $( $camel:ident ),* ) => {
|
||||
#[must_use]
|
||||
pub const fn get_language_name(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
$(
|
||||
Lang::$camel => stringify!($camel),
|
||||
)*
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! mk_langs {
|
||||
( $( ($camel:ident, $name:ident) ),* ) => {
|
||||
mk_extern!($( $name ),*);
|
||||
mk_enum!($( $camel ),*);
|
||||
mk_get_language!($( ($camel, $name) ),*);
|
||||
mk_get_language_name!($( $camel ),*);
|
||||
};
|
||||
}
|
||||
|
||||
mk_langs!(
|
||||
// 1) Name for enum
|
||||
// 2) tree-sitter function to call to get a Language
|
||||
(Agda, tree_sitter_agda),
|
||||
(Bash, tree_sitter_bash),
|
||||
(Cpp, tree_sitter_cpp),
|
||||
(CSharp, tree_sitter_c_sharp),
|
||||
(Css, tree_sitter_css),
|
||||
(C, tree_sitter_c),
|
||||
(Elixir, tree_sitter_elixir),
|
||||
(Go, tree_sitter_go),
|
||||
// (Haskell, tree_sitter_haskell),
|
||||
(Html, tree_sitter_html),
|
||||
(Javascript, tree_sitter_javascript),
|
||||
(Java, tree_sitter_java),
|
||||
(Json, tree_sitter_json),
|
||||
(Julia, tree_sitter_julia),
|
||||
(Latex, tree_sitter_latex),
|
||||
(Nix, tree_sitter_nix),
|
||||
(Php, tree_sitter_php),
|
||||
(Python, tree_sitter_python),
|
||||
(Ruby, tree_sitter_ruby),
|
||||
(Rust, tree_sitter_rust),
|
||||
(Scala, tree_sitter_scala),
|
||||
(Swift, tree_sitter_swift),
|
||||
(Toml, tree_sitter_toml),
|
||||
(Tsx, tree_sitter_tsx),
|
||||
(Typescript, tree_sitter_typescript)
|
||||
);
|
||||
|
|
0
runtime/grammars/.gitkeep
Normal file
0
runtime/grammars/.gitkeep
Normal file
Loading…
Add table
Reference in a new issue