Add helix-syntax as a wrapper around tree-sitter parsers.

This commit is contained in:
Blaž Hrastnik 2020-09-09 14:41:12 +09:00
parent 563e094916
commit 7eac12a4bb
27 changed files with 417 additions and 0 deletions

80
.gitmodules vendored Normal file
View file

@ -0,0 +1,80 @@
[submodule "helix-syntax/languages/tree-sitter-cpp"]
path = helix-syntax/languages/tree-sitter-cpp
url = https://github.com/tree-sitter/tree-sitter-cpp
shallow = true
[submodule "helix-syntax/languages/tree-sitter-javascript"]
path = helix-syntax/languages/tree-sitter-javascript
url = https://github.com/tree-sitter/tree-sitter-javascript
shallow = true
[submodule "helix-syntax/languages/tree-sitter-julia"]
path = helix-syntax/languages/tree-sitter-julia
url = https://github.com/tree-sitter/tree-sitter-julia
shallow = true
[submodule "helix-syntax/languages/tree-sitter-python"]
path = helix-syntax/languages/tree-sitter-python
url = https://github.com/tree-sitter/tree-sitter-python
shallow = true
[submodule "helix-syntax/languages/tree-sitter-typescript"]
path = helix-syntax/languages/tree-sitter-typescript
url = https://github.com/tree-sitter/tree-sitter-typescript
shallow = true
[submodule "helix-syntax/languages/tree-sitter-agda"]
path = helix-syntax/languages/tree-sitter-agda
url = https://github.com/tree-sitter/tree-sitter-agda
shallow = true
[submodule "helix-syntax/languages/tree-sitter-go"]
path = helix-syntax/languages/tree-sitter-go
url = https://github.com/tree-sitter/tree-sitter-go
shallow = true
[submodule "helix-syntax/languages/tree-sitter-ruby"]
path = helix-syntax/languages/tree-sitter-ruby
url = https://github.com/tree-sitter/tree-sitter-ruby
shallow = true
[submodule "helix-syntax/languages/tree-sitter-java"]
path = helix-syntax/languages/tree-sitter-java
url = https://github.com/tree-sitter/tree-sitter-java
shallow = true
[submodule "helix-syntax/languages/tree-sitter-php"]
path = helix-syntax/languages/tree-sitter-php
url = https://github.com/tree-sitter/tree-sitter-php
shallow = true
[submodule "helix-syntax/languages/tree-sitter-html"]
path = helix-syntax/languages/tree-sitter-html
url = https://github.com/tree-sitter/tree-sitter-html
shallow = true
[submodule "helix-syntax/languages/tree-sitter-scala"]
path = helix-syntax/languages/tree-sitter-scala
url = https://github.com/tree-sitter/tree-sitter-scala
shallow = true
[submodule "helix-syntax/languages/tree-sitter-bash"]
path = helix-syntax/languages/tree-sitter-bash
url = https://github.com/tree-sitter/tree-sitter-bash
shallow = true
[submodule "helix-syntax/languages/tree-sitter-rust"]
path = helix-syntax/languages/tree-sitter-rust
url = https://github.com/tree-sitter/tree-sitter-rust
shallow = true
[submodule "helix-syntax/languages/tree-sitter-json"]
path = helix-syntax/languages/tree-sitter-json
url = https://github.com/tree-sitter/tree-sitter-json
shallow = true
[submodule "helix-syntax/languages/tree-sitter-css"]
path = helix-syntax/languages/tree-sitter-css
url = https://github.com/tree-sitter/tree-sitter-css
shallow = true
[submodule "helix-syntax/languages/tree-sitter-c-sharp"]
path = helix-syntax/languages/tree-sitter-c-sharp
url = https://github.com/tree-sitter/tree-sitter-c-sharp
shallow = true
[submodule "helix-syntax/languages/tree-sitter-c"]
path = helix-syntax/languages/tree-sitter-c
url = https://github.com/tree-sitter/tree-sitter-c
shallow = true
[submodule "helix-syntax/languages/tree-sitter-haskell"]
path = helix-syntax/languages/tree-sitter-haskell
url = https://github.com/tree-sitter/tree-sitter-haskell
shallow = true
[submodule "helix-syntax/languages/tree-sitter-swift"]
path = helix-syntax/languages/tree-sitter-swift
url = https://github.com/tree-sitter/tree-sitter-swift
shallow = true

87
Cargo.lock generated
View file

@ -1,5 +1,14 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.32"
@ -223,6 +232,9 @@ name = "cc"
version = "1.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66120af515773fb005778dc07c261bd201ec8ce50bd6e7144c927753fe013381"
dependencies = [
"jobserver",
]
[[package]]
name = "cfg-if"
@ -274,6 +286,26 @@ dependencies = [
"winapi",
]
[[package]]
name = "enum-iterator"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c79a6321a1197d7730510c7e3f6cb80432dfefecb32426de8cea0aa19b4bb8d7"
dependencies = [
"enum-iterator-derive",
]
[[package]]
name = "enum-iterator-derive"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e94aa31f7c0dc764f57896dc615ddd76fc13b0d5dca7eb6cc5e018a5a09ec06"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "event-listener"
version = "2.4.0"
@ -344,6 +376,15 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "helix-syntax"
version = "0.1.0"
dependencies = [
"cc",
"enum-iterator",
"tree-sitter",
]
[[package]]
name = "helix-term"
version = "0.1.0"
@ -366,6 +407,15 @@ dependencies = [
"libc",
]
[[package]]
name = "jobserver"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c71313ebb9439f74b00d9d2dcec36440beaf57a6aa0623068441dd7cd81a7f2"
dependencies = [
"libc",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -536,6 +586,24 @@ version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
[[package]]
name = "regex"
version = "1.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-syntax"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
[[package]]
name = "ropey"
version = "1.2.0"
@ -629,6 +697,25 @@ dependencies = [
"utf-8",
]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
dependencies = [
"lazy_static",
]
[[package]]
name = "tree-sitter"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1df33680edb07e4fb76edcbdd9c7b849b96709fb878afcf0ada678d6bda167af"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tui"
version = "0.10.0"

View file

@ -2,4 +2,5 @@
members = [
"helix-core",
"helix-term",
"helix-syntax",
]

14
helix-syntax/Cargo.toml Normal file
View file

@ -0,0 +1,14 @@
[package]
name = "helix-syntax"
version = "0.1.0"
authors = ["Blaž Hrastnik <blaz@mxxn.io>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
tree-sitter = "0.16"
enum-iterator = "0.6"
[build-dependencies]
cc = { version = "1", features = ["parallel"] }

126
helix-syntax/build.rs Normal file
View file

@ -0,0 +1,126 @@
use cc::Build;
use std::io::{BufWriter, Read, Write};
use std::path::{Path, PathBuf};
use std::{env, fs};
fn get_opt_level() -> u32 {
env::var("OPT_LEVEL").unwrap().parse::<u32>().unwrap()
}
fn get_debug() -> bool {
env::var("DEBUG").unwrap() == "true"
}
fn collect_tree_sitter_dirs(ignore: Vec<String>) -> Vec<String> {
let mut dirs = Vec::new();
for entry in fs::read_dir("languages").unwrap() {
if let Ok(entry) = entry {
let path = entry.path();
let dir = path.file_name().unwrap().to_str().unwrap().to_string();
if !ignore.contains(&dir) {
dirs.push(dir);
}
}
}
dirs
}
fn collect_src_files(dir: &str) -> (Vec<String>, Vec<String>) {
eprintln!("Collect files for {}", dir);
let mut c_files = Vec::new();
let mut cpp_files = Vec::new();
let path = PathBuf::from("languages").join(&dir).join("src");
for entry in fs::read_dir(path).unwrap() {
if let Ok(entry) = entry {
let path = entry.path();
if path
.file_stem()
.unwrap()
.to_str()
.unwrap()
.starts_with("binding")
{
continue;
}
if let Some(ext) = path.extension() {
if ext == "c" {
c_files.push(path.to_str().unwrap().to_string());
} else if ext == "cc" || ext == "cpp" || ext == "cxx" {
cpp_files.push(path.to_str().unwrap().to_string());
}
}
}
}
(c_files, cpp_files)
}
fn build_c(files: Vec<String>, language: &str) {
let mut build = cc::Build::new();
for file in files {
build
.file(&file)
.include(PathBuf::from(file).parent().unwrap())
.pic(true)
.opt_level(get_opt_level())
.debug(get_debug())
.warnings(false)
.flag_if_supported("-std=c99");
}
build.compile(&format!("tree-sitter-{}-c", language));
}
fn build_cpp(files: Vec<String>, language: &str) {
let mut build = cc::Build::new();
for file in files {
build
.file(&file)
.include(PathBuf::from(file).parent().unwrap())
.pic(true)
.opt_level(get_opt_level())
.debug(get_debug())
.warnings(false)
.cpp(true);
}
build.compile(&format!("tree-sitter-{}-cpp", language));
}
fn build_dir(dir: &str, language: &str) {
println!("Build language {}", language);
if PathBuf::from("languages")
.join(dir)
.read_dir()
.unwrap()
.next()
.is_none()
{
eprintln!(
"The directory {} is empty, did you use 'git clone --recursive'?",
dir
);
eprintln!("You can fix in using 'git submodule init && git submodule update --recursive'.");
std::process::exit(1);
}
let (c, cpp) = collect_src_files(&dir);
if !c.is_empty() {
build_c(c, &language);
}
if !cpp.is_empty() {
build_cpp(cpp, &language);
}
}
fn main() {
let ignore = vec![
"tree-sitter-typescript".to_string(),
"tree-sitter-cpp".to_string(),
];
let dirs = collect_tree_sitter_dirs(ignore);
for dir in dirs {
let language = &dir[12..]; // skip tree-sitter- prefix
build_dir(&dir, &language);
}
build_dir("tree-sitter-typescript/tsx", "tsx");
build_dir("tree-sitter-typescript/typescript", "typescript");
}

@ -0,0 +1 @@
Subproject commit 7fcba5a1b9f83f52a3812e8cdaf16cb60b069aff

@ -0,0 +1 @@
Subproject commit 8ece09ca4c0b5e59b124cd19fa92c76b1a9e9dd4

@ -0,0 +1 @@
Subproject commit 99151b1e9293c9e025498fee7e6691e1a52e1d03

@ -0,0 +1 @@
Subproject commit 075a1b2ff5fae3142a9318d9479d568843d2fe5d

@ -0,0 +1 @@
Subproject commit 5e7476bd014445abdae879661e9caf299215478a

@ -0,0 +1 @@
Subproject commit 23f2cb97d47860c517f67f03e1f4b621d5bd2085

@ -0,0 +1 @@
Subproject commit 34181774b3e86b7801c939c79c7b80a82df91a2b

@ -0,0 +1 @@
Subproject commit 2a0aa1cb5f1b787a4056a29fa0791e87846e33fb

@ -0,0 +1 @@
Subproject commit 7f442e1c6163d450c69c75c7a621badc3a0ea98f

@ -0,0 +1 @@
Subproject commit ee0a2a076785145e350fbd413775d1e003f79315

@ -0,0 +1 @@
Subproject commit feca6ec5e577fa30766f0c0a1e03d32c073027f9

@ -0,0 +1 @@
Subproject commit d3976b27df8622ed17bef6dd5e358b398e73c676

@ -0,0 +1 @@
Subproject commit 6a0863f1ce3fcf6f99dc0addb7886dcbd27c5a48

@ -0,0 +1 @@
Subproject commit b0c0367d4b7058921fdc4ba11e257441a64ab809

@ -0,0 +1 @@
Subproject commit 58f57240834d6b88624e32ad0ab9531d55fb7a5d

@ -0,0 +1 @@
Subproject commit 14a5e56a6fff1f2d40c151ae38b5581fc5c44574

@ -0,0 +1 @@
Subproject commit 40620bf4097cbc9cea79504d7e877865df43a19e

@ -0,0 +1 @@
Subproject commit 211bb726bb5857f872247b600c7c1808e641a8d4

@ -0,0 +1 @@
Subproject commit a22fa5e19bae50098e2252ea96cba3aba43f4c58

@ -0,0 +1 @@
Subproject commit 220ae17fad029f86513498648c90198e8bed872e

87
helix-syntax/src/lib.rs Normal file
View file

@ -0,0 +1,87 @@
use enum_iterator::IntoEnumIterator;
use tree_sitter::Language;
#[macro_export]
macro_rules! mk_extern {
( $( $name:ident ),* ) => {
$(
extern "C" { pub fn $name() -> Language; }
)*
};
}
#[macro_export]
macro_rules! mk_enum {
( $( $camel:ident ),* ) => {
#[derive(Clone, Debug, IntoEnumIterator, PartialEq)]
pub enum LANG {
$(
$camel,
)*
}
};
}
#[macro_export]
macro_rules! mk_get_language {
( $( ($camel:ident, $name:ident) ),* ) => {
pub fn get_language(lang: &LANG) -> Language {
unsafe {
match lang {
$(
LANG::$camel => $name(),
)*
}
}
}
};
}
#[macro_export]
macro_rules! mk_get_language_name {
( $( $camel:ident ),* ) => {
pub fn get_language_name(lang: &LANG) -> &'static str {
match lang {
$(
LANG::$camel => stringify!($camel),
)*
}
}
};
}
#[macro_export]
macro_rules! mk_langs {
( $( ($camel:ident, $name:ident) ),* ) => {
mk_extern!($( $name ),*);
mk_enum!($( $camel ),*);
mk_get_language!($( ($camel, $name) ),*);
mk_get_language_name!($( $camel ),*);
};
}
mk_langs!(
// 1) Name for enum
// 2) tree-sitter function to call to get a Language
(Agda, tree_sitter_agda),
(Bash, tree_sitter_bash),
(C, tree_sitter_c),
(CSharp, tree_sitter_c_sharp),
// (Cpp, tree_sitter_cpp),
(Css, tree_sitter_css),
(Go, tree_sitter_go),
(Haskell, tree_sitter_haskell),
(Html, tree_sitter_html),
(Java, tree_sitter_java),
(Javascript, tree_sitter_javascript),
(Json, tree_sitter_json),
(Julia, tree_sitter_julia),
(Php, tree_sitter_php),
(Python, tree_sitter_python),
(Ruby, tree_sitter_ruby),
(Rust, tree_sitter_rust),
(Scala, tree_sitter_scala),
(Swift, tree_sitter_swift),
(Tsx, tree_sitter_tsx),
(Typescript, tree_sitter_typescript)
);

View file

@ -14,6 +14,8 @@ path = "src/main.rs"
# termwiz = { git = "https://github.com/wez/wezterm", features = ["widgets"] }
# termwiz = { path = "../../wezterm/termwiz", default-features = false, features = ["widgets"] }
helix-syntax = { path = "../helix-syntax" }
anyhow = "1"
argh = "0.1.3"
helix-core = { path = "../helix-core" }