44e113cb76
When parsing injections, we skip adding a new layer if there is an existing layer covering the same range. When doing so we did not update the parent layer ID, so some layers could have `parent` layer IDs that pointed to a layer that no longer existed in the `layers` HopSlotMap which could cause a panic when using `A-o`. To fix this we update the `parent` pointer for both newly created injection layers and reused ones.
3015 lines
107 KiB
Rust
3015 lines
107 KiB
Rust
mod tree_cursor;
|
|
|
|
use crate::{
|
|
auto_pairs::AutoPairs,
|
|
chars::char_is_line_ending,
|
|
diagnostic::Severity,
|
|
regex::Regex,
|
|
transaction::{ChangeSet, Operation},
|
|
RopeSlice, Tendril,
|
|
};
|
|
|
|
use ahash::RandomState;
|
|
use arc_swap::{ArcSwap, Guard};
|
|
use bitflags::bitflags;
|
|
use globset::GlobSet;
|
|
use hashbrown::raw::RawTable;
|
|
use helix_stdx::rope::{self, RopeSliceExt};
|
|
use slotmap::{DefaultKey as LayerId, HopSlotMap};
|
|
|
|
use std::{
|
|
borrow::Cow,
|
|
cell::RefCell,
|
|
collections::{HashMap, HashSet, VecDeque},
|
|
fmt::{self, Display, Write},
|
|
hash::{Hash, Hasher},
|
|
mem::replace,
|
|
path::{Path, PathBuf},
|
|
str::FromStr,
|
|
sync::Arc,
|
|
};
|
|
|
|
use once_cell::sync::{Lazy, OnceCell};
|
|
use serde::{ser::SerializeSeq, Deserialize, Serialize};
|
|
|
|
use helix_loader::grammar::{get_language, load_runtime_file};
|
|
|
|
pub use tree_cursor::TreeCursor;
|
|
|
|
fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
Option::<String>::deserialize(deserializer)?
|
|
.map(|buf| Regex::new(&buf).map_err(serde::de::Error::custom))
|
|
.transpose()
|
|
}
|
|
|
|
fn deserialize_lsp_config<'de, D>(deserializer: D) -> Result<Option<serde_json::Value>, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
Option::<toml::Value>::deserialize(deserializer)?
|
|
.map(|toml| toml.try_into().map_err(serde::de::Error::custom))
|
|
.transpose()
|
|
}
|
|
|
|
fn deserialize_tab_width<'de, D>(deserializer: D) -> Result<usize, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
usize::deserialize(deserializer).and_then(|n| {
|
|
if n > 0 && n <= 16 {
|
|
Ok(n)
|
|
} else {
|
|
Err(serde::de::Error::custom(
|
|
"tab width must be a value from 1 to 16 inclusive",
|
|
))
|
|
}
|
|
})
|
|
}
|
|
|
|
pub fn deserialize_auto_pairs<'de, D>(deserializer: D) -> Result<Option<AutoPairs>, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
Ok(Option::<AutoPairConfig>::deserialize(deserializer)?.and_then(AutoPairConfig::into))
|
|
}
|
|
|
|
fn default_timeout() -> u64 {
|
|
20
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub struct Configuration {
|
|
pub language: Vec<LanguageConfiguration>,
|
|
#[serde(default)]
|
|
pub language_server: HashMap<String, LanguageServerConfiguration>,
|
|
}
|
|
|
|
// largely based on tree-sitter/cli/src/loader.rs
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
|
|
pub struct LanguageConfiguration {
|
|
#[serde(rename = "name")]
|
|
pub language_id: String, // c-sharp, rust, tsx
|
|
#[serde(rename = "language-id")]
|
|
// see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem
|
|
pub language_server_language_id: Option<String>, // csharp, rust, typescriptreact, for the language-server
|
|
pub scope: String, // source.rust
|
|
pub file_types: Vec<FileType>, // filename extension or ends_with? <Gemfile, rb, etc>
|
|
#[serde(default)]
|
|
pub shebangs: Vec<String>, // interpreter(s) associated with language
|
|
#[serde(default)]
|
|
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
|
|
#[serde(
|
|
default,
|
|
skip_serializing,
|
|
deserialize_with = "from_comment_tokens",
|
|
alias = "comment-token"
|
|
)]
|
|
pub comment_tokens: Option<Vec<String>>,
|
|
#[serde(
|
|
default,
|
|
skip_serializing,
|
|
deserialize_with = "from_block_comment_tokens"
|
|
)]
|
|
pub block_comment_tokens: Option<Vec<BlockCommentToken>>,
|
|
pub text_width: Option<usize>,
|
|
pub soft_wrap: Option<SoftWrap>,
|
|
|
|
#[serde(default)]
|
|
pub auto_format: bool,
|
|
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub formatter: Option<FormatterConfiguration>,
|
|
|
|
#[serde(default)]
|
|
pub diagnostic_severity: Severity,
|
|
|
|
pub grammar: Option<String>, // tree-sitter grammar name, defaults to language_id
|
|
|
|
// content_regex
|
|
#[serde(default, skip_serializing, deserialize_with = "deserialize_regex")]
|
|
pub injection_regex: Option<Regex>,
|
|
// first_line_regex
|
|
//
|
|
#[serde(skip)]
|
|
pub(crate) highlight_config: OnceCell<Option<Arc<HighlightConfiguration>>>,
|
|
// tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
|
|
#[serde(
|
|
default,
|
|
skip_serializing_if = "Vec::is_empty",
|
|
serialize_with = "serialize_lang_features",
|
|
deserialize_with = "deserialize_lang_features"
|
|
)]
|
|
pub language_servers: Vec<LanguageServerFeatures>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub indent: Option<IndentationConfiguration>,
|
|
|
|
#[serde(skip)]
|
|
pub(crate) indent_query: OnceCell<Option<Query>>,
|
|
#[serde(skip)]
|
|
pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub debugger: Option<DebugAdapterConfig>,
|
|
|
|
/// Automatic insertion of pairs to parentheses, brackets,
|
|
/// etc. Defaults to true. Optionally, this can be a list of 2-tuples
|
|
/// to specify a list of characters to pair. This overrides the
|
|
/// global setting.
|
|
#[serde(default, skip_serializing, deserialize_with = "deserialize_auto_pairs")]
|
|
pub auto_pairs: Option<AutoPairs>,
|
|
|
|
pub rulers: Option<Vec<u16>>, // if set, override editor's rulers
|
|
|
|
/// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`.
|
|
/// Falling back to the current working directory if none are configured.
|
|
pub workspace_lsp_roots: Option<Vec<PathBuf>>,
|
|
#[serde(default)]
|
|
pub persistent_diagnostic_sources: Vec<String>,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Hash)]
|
|
pub enum FileType {
|
|
/// The extension of the file, either the `Path::extension` or the full
|
|
/// filename if the file does not have an extension.
|
|
Extension(String),
|
|
/// A Unix-style path glob. This is compared to the file's absolute path, so
|
|
/// it can be used to detect files based on their directories. If the glob
|
|
/// is not an absolute path and does not already start with a glob pattern,
|
|
/// a glob pattern will be prepended to it.
|
|
Glob(globset::Glob),
|
|
}
|
|
|
|
impl Serialize for FileType {
|
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: serde::Serializer,
|
|
{
|
|
use serde::ser::SerializeMap;
|
|
|
|
match self {
|
|
FileType::Extension(extension) => serializer.serialize_str(extension),
|
|
FileType::Glob(glob) => {
|
|
let mut map = serializer.serialize_map(Some(1))?;
|
|
map.serialize_entry("glob", glob.glob())?;
|
|
map.end()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'de> Deserialize<'de> for FileType {
|
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
|
where
|
|
D: serde::de::Deserializer<'de>,
|
|
{
|
|
struct FileTypeVisitor;
|
|
|
|
impl<'de> serde::de::Visitor<'de> for FileTypeVisitor {
|
|
type Value = FileType;
|
|
|
|
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
formatter.write_str("string or table")
|
|
}
|
|
|
|
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
|
where
|
|
E: serde::de::Error,
|
|
{
|
|
Ok(FileType::Extension(value.to_string()))
|
|
}
|
|
|
|
fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
|
|
where
|
|
M: serde::de::MapAccess<'de>,
|
|
{
|
|
match map.next_entry::<String, String>()? {
|
|
Some((key, mut glob)) if key == "glob" => {
|
|
// If the glob isn't an absolute path or already starts
|
|
// with a glob pattern, add a leading glob so we
|
|
// properly match relative paths.
|
|
if !glob.starts_with('/') && !glob.starts_with("*/") {
|
|
glob.insert_str(0, "*/");
|
|
}
|
|
|
|
globset::Glob::new(glob.as_str())
|
|
.map(FileType::Glob)
|
|
.map_err(|err| {
|
|
serde::de::Error::custom(format!("invalid `glob` pattern: {}", err))
|
|
})
|
|
}
|
|
Some((key, _value)) => Err(serde::de::Error::custom(format!(
|
|
"unknown key in `file-types` list: {}",
|
|
key
|
|
))),
|
|
None => Err(serde::de::Error::custom(
|
|
"expected a `suffix` key in the `file-types` entry",
|
|
)),
|
|
}
|
|
}
|
|
}
|
|
|
|
deserializer.deserialize_any(FileTypeVisitor)
|
|
}
|
|
}
|
|
|
|
fn from_comment_tokens<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
#[derive(Deserialize)]
|
|
#[serde(untagged)]
|
|
enum CommentTokens {
|
|
Multiple(Vec<String>),
|
|
Single(String),
|
|
}
|
|
Ok(
|
|
Option::<CommentTokens>::deserialize(deserializer)?.map(|tokens| match tokens {
|
|
CommentTokens::Single(val) => vec![val],
|
|
CommentTokens::Multiple(vals) => vals,
|
|
}),
|
|
)
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct BlockCommentToken {
|
|
pub start: String,
|
|
pub end: String,
|
|
}
|
|
|
|
impl Default for BlockCommentToken {
|
|
fn default() -> Self {
|
|
BlockCommentToken {
|
|
start: "/*".to_string(),
|
|
end: "*/".to_string(),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn from_block_comment_tokens<'de, D>(
|
|
deserializer: D,
|
|
) -> Result<Option<Vec<BlockCommentToken>>, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
#[derive(Deserialize)]
|
|
#[serde(untagged)]
|
|
enum BlockCommentTokens {
|
|
Multiple(Vec<BlockCommentToken>),
|
|
Single(BlockCommentToken),
|
|
}
|
|
Ok(
|
|
Option::<BlockCommentTokens>::deserialize(deserializer)?.map(|tokens| match tokens {
|
|
BlockCommentTokens::Single(val) => vec![val],
|
|
BlockCommentTokens::Multiple(vals) => vals,
|
|
}),
|
|
)
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub enum LanguageServerFeature {
|
|
Format,
|
|
GotoDeclaration,
|
|
GotoDefinition,
|
|
GotoTypeDefinition,
|
|
GotoReference,
|
|
GotoImplementation,
|
|
// Goto, use bitflags, combining previous Goto members?
|
|
SignatureHelp,
|
|
Hover,
|
|
DocumentHighlight,
|
|
Completion,
|
|
CodeAction,
|
|
WorkspaceCommand,
|
|
DocumentSymbols,
|
|
WorkspaceSymbols,
|
|
// Symbols, use bitflags, see above?
|
|
Diagnostics,
|
|
RenameSymbol,
|
|
InlayHints,
|
|
}
|
|
|
|
impl Display for LanguageServerFeature {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
use LanguageServerFeature::*;
|
|
let feature = match self {
|
|
Format => "format",
|
|
GotoDeclaration => "goto-declaration",
|
|
GotoDefinition => "goto-definition",
|
|
GotoTypeDefinition => "goto-type-definition",
|
|
GotoReference => "goto-reference",
|
|
GotoImplementation => "goto-implementation",
|
|
SignatureHelp => "signature-help",
|
|
Hover => "hover",
|
|
DocumentHighlight => "document-highlight",
|
|
Completion => "completion",
|
|
CodeAction => "code-action",
|
|
WorkspaceCommand => "workspace-command",
|
|
DocumentSymbols => "document-symbols",
|
|
WorkspaceSymbols => "workspace-symbols",
|
|
Diagnostics => "diagnostics",
|
|
RenameSymbol => "rename-symbol",
|
|
InlayHints => "inlay-hints",
|
|
};
|
|
write!(f, "{feature}",)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
|
|
enum LanguageServerFeatureConfiguration {
|
|
#[serde(rename_all = "kebab-case")]
|
|
Features {
|
|
#[serde(default, skip_serializing_if = "HashSet::is_empty")]
|
|
only_features: HashSet<LanguageServerFeature>,
|
|
#[serde(default, skip_serializing_if = "HashSet::is_empty")]
|
|
except_features: HashSet<LanguageServerFeature>,
|
|
name: String,
|
|
},
|
|
Simple(String),
|
|
}
|
|
|
|
#[derive(Debug, Default)]
|
|
pub struct LanguageServerFeatures {
|
|
pub name: String,
|
|
pub only: HashSet<LanguageServerFeature>,
|
|
pub excluded: HashSet<LanguageServerFeature>,
|
|
}
|
|
|
|
impl LanguageServerFeatures {
|
|
pub fn has_feature(&self, feature: LanguageServerFeature) -> bool {
|
|
(self.only.is_empty() || self.only.contains(&feature)) && !self.excluded.contains(&feature)
|
|
}
|
|
}
|
|
|
|
fn deserialize_lang_features<'de, D>(
|
|
deserializer: D,
|
|
) -> Result<Vec<LanguageServerFeatures>, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
let raw: Vec<LanguageServerFeatureConfiguration> = Deserialize::deserialize(deserializer)?;
|
|
let res = raw
|
|
.into_iter()
|
|
.map(|config| match config {
|
|
LanguageServerFeatureConfiguration::Simple(name) => LanguageServerFeatures {
|
|
name,
|
|
..Default::default()
|
|
},
|
|
LanguageServerFeatureConfiguration::Features {
|
|
only_features,
|
|
except_features,
|
|
name,
|
|
} => LanguageServerFeatures {
|
|
name,
|
|
only: only_features,
|
|
excluded: except_features,
|
|
},
|
|
})
|
|
.collect();
|
|
Ok(res)
|
|
}
|
|
fn serialize_lang_features<S>(
|
|
map: &Vec<LanguageServerFeatures>,
|
|
serializer: S,
|
|
) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: serde::Serializer,
|
|
{
|
|
let mut serializer = serializer.serialize_seq(Some(map.len()))?;
|
|
for features in map {
|
|
let features = if features.only.is_empty() && features.excluded.is_empty() {
|
|
LanguageServerFeatureConfiguration::Simple(features.name.to_owned())
|
|
} else {
|
|
LanguageServerFeatureConfiguration::Features {
|
|
only_features: features.only.clone(),
|
|
except_features: features.excluded.clone(),
|
|
name: features.name.to_owned(),
|
|
}
|
|
};
|
|
serializer.serialize_element(&features)?;
|
|
}
|
|
serializer.end()
|
|
}
|
|
|
|
fn deserialize_required_root_patterns<'de, D>(deserializer: D) -> Result<Option<GlobSet>, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
let patterns = Vec::<String>::deserialize(deserializer)?;
|
|
if patterns.is_empty() {
|
|
return Ok(None);
|
|
}
|
|
let mut builder = globset::GlobSetBuilder::new();
|
|
for pattern in patterns {
|
|
let glob = globset::Glob::new(&pattern).map_err(serde::de::Error::custom)?;
|
|
builder.add(glob);
|
|
}
|
|
builder.build().map(Some).map_err(serde::de::Error::custom)
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub struct LanguageServerConfiguration {
|
|
pub command: String,
|
|
#[serde(default)]
|
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
|
pub args: Vec<String>,
|
|
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
|
|
pub environment: HashMap<String, String>,
|
|
#[serde(default, skip_serializing, deserialize_with = "deserialize_lsp_config")]
|
|
pub config: Option<serde_json::Value>,
|
|
#[serde(default = "default_timeout")]
|
|
pub timeout: u64,
|
|
#[serde(
|
|
default,
|
|
skip_serializing,
|
|
deserialize_with = "deserialize_required_root_patterns"
|
|
)]
|
|
pub required_root_patterns: Option<GlobSet>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub struct FormatterConfiguration {
|
|
pub command: String,
|
|
#[serde(default)]
|
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
|
pub args: Vec<String>,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub struct AdvancedCompletion {
|
|
pub name: Option<String>,
|
|
pub completion: Option<String>,
|
|
pub default: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
|
|
#[serde(rename_all = "kebab-case", untagged)]
|
|
pub enum DebugConfigCompletion {
|
|
Named(String),
|
|
Advanced(AdvancedCompletion),
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
|
|
#[serde(untagged)]
|
|
pub enum DebugArgumentValue {
|
|
String(String),
|
|
Array(Vec<String>),
|
|
Boolean(bool),
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub struct DebugTemplate {
|
|
pub name: String,
|
|
pub request: String,
|
|
#[serde(default)]
|
|
pub completion: Vec<DebugConfigCompletion>,
|
|
pub args: HashMap<String, DebugArgumentValue>,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub struct DebugAdapterConfig {
|
|
pub name: String,
|
|
pub transport: String,
|
|
#[serde(default)]
|
|
pub command: String,
|
|
#[serde(default)]
|
|
pub args: Vec<String>,
|
|
pub port_arg: Option<String>,
|
|
pub templates: Vec<DebugTemplate>,
|
|
#[serde(default)]
|
|
pub quirks: DebuggerQuirks,
|
|
}
|
|
|
|
// Different workarounds for adapters' differences
|
|
#[derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
|
pub struct DebuggerQuirks {
|
|
#[serde(default)]
|
|
pub absolute_paths: bool,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub struct IndentationConfiguration {
|
|
#[serde(deserialize_with = "deserialize_tab_width")]
|
|
pub tab_width: usize,
|
|
pub unit: String,
|
|
}
|
|
|
|
/// How the indentation for a newly inserted line should be determined.
|
|
/// If the selected heuristic is not available (e.g. because the current
|
|
/// language has no tree-sitter indent queries), a simpler one will be used.
|
|
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub enum IndentationHeuristic {
|
|
/// Just copy the indentation of the line that the cursor is currently on.
|
|
Simple,
|
|
/// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line.
|
|
TreeSitter,
|
|
/// Use tree-sitter indent queries to compute the expected difference in indentation between the new line
|
|
/// and the line before. Add this to the actual indentation level of the line before.
|
|
#[default]
|
|
Hybrid,
|
|
}
|
|
|
|
/// Configuration for auto pairs
|
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
#[serde(rename_all = "kebab-case", deny_unknown_fields, untagged)]
|
|
pub enum AutoPairConfig {
|
|
/// Enables or disables auto pairing. False means disabled. True means to use the default pairs.
|
|
Enable(bool),
|
|
|
|
/// The mappings of pairs.
|
|
Pairs(HashMap<char, char>),
|
|
}
|
|
|
|
impl Default for AutoPairConfig {
|
|
fn default() -> Self {
|
|
AutoPairConfig::Enable(true)
|
|
}
|
|
}
|
|
|
|
impl From<&AutoPairConfig> for Option<AutoPairs> {
|
|
fn from(auto_pair_config: &AutoPairConfig) -> Self {
|
|
match auto_pair_config {
|
|
AutoPairConfig::Enable(false) => None,
|
|
AutoPairConfig::Enable(true) => Some(AutoPairs::default()),
|
|
AutoPairConfig::Pairs(pairs) => Some(AutoPairs::new(pairs.iter())),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<AutoPairConfig> for Option<AutoPairs> {
|
|
fn from(auto_pairs_config: AutoPairConfig) -> Self {
|
|
(&auto_pairs_config).into()
|
|
}
|
|
}
|
|
|
|
impl FromStr for AutoPairConfig {
|
|
type Err = std::str::ParseBoolError;
|
|
|
|
// only do bool parsing for runtime setting
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
let enable: bool = s.parse()?;
|
|
Ok(AutoPairConfig::Enable(enable))
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct TextObjectQuery {
|
|
pub query: Query,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum CapturedNode<'a> {
|
|
Single(Node<'a>),
|
|
/// Guaranteed to be not empty
|
|
Grouped(Vec<Node<'a>>),
|
|
}
|
|
|
|
impl<'a> CapturedNode<'a> {
|
|
pub fn start_byte(&self) -> usize {
|
|
match self {
|
|
Self::Single(n) => n.start_byte(),
|
|
Self::Grouped(ns) => ns[0].start_byte(),
|
|
}
|
|
}
|
|
|
|
pub fn end_byte(&self) -> usize {
|
|
match self {
|
|
Self::Single(n) => n.end_byte(),
|
|
Self::Grouped(ns) => ns.last().unwrap().end_byte(),
|
|
}
|
|
}
|
|
|
|
pub fn byte_range(&self) -> std::ops::Range<usize> {
|
|
self.start_byte()..self.end_byte()
|
|
}
|
|
}
|
|
|
|
/// The maximum number of in-progress matches a TS cursor can consider at once.
|
|
/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
|
|
/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
|
|
///
|
|
///
|
|
/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
|
|
/// However, this causes performance issues for medium to large files.
|
|
/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
|
|
///
|
|
///
|
|
/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
|
|
/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
|
|
/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
|
|
///
|
|
///
|
|
/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
|
|
/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
|
|
/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
|
|
const TREE_SITTER_MATCH_LIMIT: u32 = 256;
|
|
|
|
impl TextObjectQuery {
|
|
/// Run the query on the given node and return sub nodes which match given
|
|
/// capture ("function.inside", "class.around", etc).
|
|
///
|
|
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
|
|
/// and support for this is partial and could use improvement.
|
|
///
|
|
/// ```query
|
|
/// (comment)+ @capture
|
|
///
|
|
/// ; OR
|
|
/// (
|
|
/// (comment)*
|
|
/// .
|
|
/// (function)
|
|
/// ) @capture
|
|
/// ```
|
|
pub fn capture_nodes<'a>(
|
|
&'a self,
|
|
capture_name: &str,
|
|
node: Node<'a>,
|
|
slice: RopeSlice<'a>,
|
|
cursor: &'a mut QueryCursor,
|
|
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
|
|
self.capture_nodes_any(&[capture_name], node, slice, cursor)
|
|
}
|
|
|
|
/// Find the first capture that exists out of all given `capture_names`
|
|
/// and return sub nodes that match this capture.
|
|
pub fn capture_nodes_any<'a>(
|
|
&'a self,
|
|
capture_names: &[&str],
|
|
node: Node<'a>,
|
|
slice: RopeSlice<'a>,
|
|
cursor: &'a mut QueryCursor,
|
|
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
|
|
let capture_idx = capture_names
|
|
.iter()
|
|
.find_map(|cap| self.query.capture_index_for_name(cap))?;
|
|
|
|
cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
|
|
|
|
let nodes = cursor
|
|
.captures(&self.query, node, RopeProvider(slice))
|
|
.filter_map(move |(mat, _)| {
|
|
let nodes: Vec<_> = mat
|
|
.captures
|
|
.iter()
|
|
.filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
|
|
.collect();
|
|
|
|
if nodes.len() > 1 {
|
|
Some(CapturedNode::Grouped(nodes))
|
|
} else {
|
|
nodes.into_iter().map(CapturedNode::Single).next()
|
|
}
|
|
});
|
|
|
|
Some(nodes)
|
|
}
|
|
}
|
|
|
|
pub fn read_query(language: &str, filename: &str) -> String {
|
|
static INHERITS_REGEX: Lazy<Regex> =
|
|
Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
|
|
|
|
let query = load_runtime_file(language, filename).unwrap_or_default();
|
|
|
|
// replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
|
|
INHERITS_REGEX
|
|
.replace_all(&query, |captures: ®ex::Captures| {
|
|
captures[1]
|
|
.split(',')
|
|
.fold(String::new(), |mut output, language| {
|
|
// `write!` to a String cannot fail.
|
|
write!(output, "\n{}\n", read_query(language, filename)).unwrap();
|
|
output
|
|
})
|
|
})
|
|
.to_string()
|
|
}
|
|
|
|
impl LanguageConfiguration {
|
|
fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
|
|
let highlights_query = read_query(&self.language_id, "highlights.scm");
|
|
// always highlight syntax errors
|
|
// highlights_query += "\n(ERROR) @error";
|
|
|
|
let injections_query = read_query(&self.language_id, "injections.scm");
|
|
let locals_query = read_query(&self.language_id, "locals.scm");
|
|
|
|
if highlights_query.is_empty() {
|
|
None
|
|
} else {
|
|
let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
|
|
.map_err(|err| {
|
|
log::error!(
|
|
"Failed to load tree-sitter parser for language {:?}: {}",
|
|
self.language_id,
|
|
err
|
|
)
|
|
})
|
|
.ok()?;
|
|
let config = HighlightConfiguration::new(
|
|
language,
|
|
&highlights_query,
|
|
&injections_query,
|
|
&locals_query,
|
|
)
|
|
.map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
|
|
.ok()?;
|
|
|
|
config.configure(scopes);
|
|
Some(Arc::new(config))
|
|
}
|
|
}
|
|
|
|
pub fn reconfigure(&self, scopes: &[String]) {
|
|
if let Some(Some(config)) = self.highlight_config.get() {
|
|
config.configure(scopes);
|
|
}
|
|
}
|
|
|
|
pub fn highlight_config(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
|
|
self.highlight_config
|
|
.get_or_init(|| self.initialize_highlight(scopes))
|
|
.clone()
|
|
}
|
|
|
|
pub fn is_highlight_initialized(&self) -> bool {
|
|
self.highlight_config.get().is_some()
|
|
}
|
|
|
|
pub fn indent_query(&self) -> Option<&Query> {
|
|
self.indent_query
|
|
.get_or_init(|| self.load_query("indents.scm"))
|
|
.as_ref()
|
|
}
|
|
|
|
pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
|
|
self.textobject_query
|
|
.get_or_init(|| {
|
|
self.load_query("textobjects.scm")
|
|
.map(|query| TextObjectQuery { query })
|
|
})
|
|
.as_ref()
|
|
}
|
|
|
|
pub fn scope(&self) -> &str {
|
|
&self.scope
|
|
}
|
|
|
|
fn load_query(&self, kind: &str) -> Option<Query> {
|
|
let query_text = read_query(&self.language_id, kind);
|
|
if query_text.is_empty() {
|
|
return None;
|
|
}
|
|
let lang = &self.highlight_config.get()?.as_ref()?.language;
|
|
Query::new(lang, &query_text)
|
|
.map_err(|e| {
|
|
log::error!(
|
|
"Failed to parse {} queries for {}: {}",
|
|
kind,
|
|
self.language_id,
|
|
e
|
|
)
|
|
})
|
|
.ok()
|
|
}
|
|
}
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
|
#[serde(default, rename_all = "kebab-case", deny_unknown_fields)]
|
|
pub struct SoftWrap {
|
|
/// Soft wrap lines that exceed viewport width. Default to off
|
|
// NOTE: Option on purpose because the struct is shared between language config and global config.
|
|
// By default the option is None so that the language config falls back to the global config unless explicitly set.
|
|
pub enable: Option<bool>,
|
|
/// Maximum space left free at the end of the line.
|
|
/// This space is used to wrap text at word boundaries. If that is not possible within this limit
|
|
/// the word is simply split at the end of the line.
|
|
///
|
|
/// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views.
|
|
///
|
|
/// Default to 20
|
|
pub max_wrap: Option<u16>,
|
|
/// Maximum number of indentation that can be carried over from the previous line when softwrapping.
|
|
/// If a line is indented further then this limit it is rendered at the start of the viewport instead.
|
|
///
|
|
/// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views.
|
|
///
|
|
/// Default to 40
|
|
pub max_indent_retain: Option<u16>,
|
|
/// Indicator placed at the beginning of softwrapped lines
|
|
///
|
|
/// Defaults to ↪
|
|
pub wrap_indicator: Option<String>,
|
|
/// Softwrap at `text_width` instead of viewport width if it is shorter
|
|
pub wrap_at_text_width: Option<bool>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct FileTypeGlob {
|
|
glob: globset::Glob,
|
|
language_id: usize,
|
|
}
|
|
|
|
impl FileTypeGlob {
|
|
fn new(glob: globset::Glob, language_id: usize) -> Self {
|
|
Self { glob, language_id }
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct FileTypeGlobMatcher {
|
|
matcher: globset::GlobSet,
|
|
file_types: Vec<FileTypeGlob>,
|
|
}
|
|
|
|
impl FileTypeGlobMatcher {
|
|
fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> {
|
|
let mut builder = globset::GlobSetBuilder::new();
|
|
for file_type in &file_types {
|
|
builder.add(file_type.glob.clone());
|
|
}
|
|
|
|
Ok(Self {
|
|
matcher: builder.build()?,
|
|
file_types,
|
|
})
|
|
}
|
|
|
|
fn language_id_for_path(&self, path: &Path) -> Option<&usize> {
|
|
self.matcher
|
|
.matches(path)
|
|
.iter()
|
|
.filter_map(|idx| self.file_types.get(*idx))
|
|
.max_by_key(|file_type| file_type.glob.glob().len())
|
|
.map(|file_type| &file_type.language_id)
|
|
}
|
|
}
|
|
|
|
// Expose loader as Lazy<> global since it's always static?
|
|
|
|
#[derive(Debug)]
|
|
pub struct Loader {
|
|
// highlight_names ?
|
|
language_configs: Vec<Arc<LanguageConfiguration>>,
|
|
language_config_ids_by_extension: HashMap<String, usize>, // Vec<usize>
|
|
language_config_ids_glob_matcher: FileTypeGlobMatcher,
|
|
language_config_ids_by_shebang: HashMap<String, usize>,
|
|
|
|
language_server_configs: HashMap<String, LanguageServerConfiguration>,
|
|
|
|
scopes: ArcSwap<Vec<String>>,
|
|
}
|
|
|
|
pub type LoaderError = globset::Error;
|
|
|
|
impl Loader {
|
|
pub fn new(config: Configuration) -> Result<Self, LoaderError> {
|
|
let mut language_configs = Vec::new();
|
|
let mut language_config_ids_by_extension = HashMap::new();
|
|
let mut language_config_ids_by_shebang = HashMap::new();
|
|
let mut file_type_globs = Vec::new();
|
|
|
|
for config in config.language {
|
|
// get the next id
|
|
let language_id = language_configs.len();
|
|
|
|
for file_type in &config.file_types {
|
|
// entry().or_insert(Vec::new).push(language_id);
|
|
match file_type {
|
|
FileType::Extension(extension) => {
|
|
language_config_ids_by_extension.insert(extension.clone(), language_id);
|
|
}
|
|
FileType::Glob(glob) => {
|
|
file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language_id));
|
|
}
|
|
};
|
|
}
|
|
for shebang in &config.shebangs {
|
|
language_config_ids_by_shebang.insert(shebang.clone(), language_id);
|
|
}
|
|
|
|
language_configs.push(Arc::new(config));
|
|
}
|
|
|
|
Ok(Self {
|
|
language_configs,
|
|
language_config_ids_by_extension,
|
|
language_config_ids_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?,
|
|
language_config_ids_by_shebang,
|
|
language_server_configs: config.language_server,
|
|
scopes: ArcSwap::from_pointee(Vec::new()),
|
|
})
|
|
}
|
|
|
|
pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
|
|
// Find all the language configurations that match this file name
|
|
// or a suffix of the file name.
|
|
let configuration_id = self
|
|
.language_config_ids_glob_matcher
|
|
.language_id_for_path(path)
|
|
.or_else(|| {
|
|
path.extension()
|
|
.and_then(|extension| extension.to_str())
|
|
.and_then(|extension| self.language_config_ids_by_extension.get(extension))
|
|
});
|
|
|
|
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
|
|
|
|
// TODO: content_regex handling conflict resolution
|
|
}
|
|
|
|
pub fn language_config_for_shebang(
|
|
&self,
|
|
source: RopeSlice,
|
|
) -> Option<Arc<LanguageConfiguration>> {
|
|
let line = Cow::from(source.line(0));
|
|
static SHEBANG_REGEX: Lazy<Regex> =
|
|
Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap());
|
|
let configuration_id = SHEBANG_REGEX
|
|
.captures(&line)
|
|
.and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1]));
|
|
|
|
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
|
|
}
|
|
|
|
pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> {
|
|
self.language_configs
|
|
.iter()
|
|
.find(|config| config.scope == scope)
|
|
.cloned()
|
|
}
|
|
|
|
pub fn language_config_for_language_id(&self, id: &str) -> Option<Arc<LanguageConfiguration>> {
|
|
self.language_configs
|
|
.iter()
|
|
.find(|config| config.language_id == id)
|
|
.cloned()
|
|
}
|
|
|
|
/// Unlike language_config_for_language_id, which only returns Some for an exact id, this
|
|
/// function will perform a regex match on the given string to find the closest language match.
|
|
pub fn language_config_for_name(&self, name: &str) -> Option<Arc<LanguageConfiguration>> {
|
|
let mut best_match_length = 0;
|
|
let mut best_match_position = None;
|
|
for (i, configuration) in self.language_configs.iter().enumerate() {
|
|
if let Some(injection_regex) = &configuration.injection_regex {
|
|
if let Some(mat) = injection_regex.find(name) {
|
|
let length = mat.end() - mat.start();
|
|
if length > best_match_length {
|
|
best_match_position = Some(i);
|
|
best_match_length = length;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
best_match_position.map(|i| self.language_configs[i].clone())
|
|
}
|
|
|
|
pub fn language_configuration_for_injection_string(
|
|
&self,
|
|
capture: &InjectionLanguageMarker,
|
|
) -> Option<Arc<LanguageConfiguration>> {
|
|
match capture {
|
|
InjectionLanguageMarker::Name(string) => self.language_config_for_name(string),
|
|
InjectionLanguageMarker::Filename(file) => self.language_config_for_file_name(file),
|
|
InjectionLanguageMarker::Shebang(shebang) => {
|
|
self.language_config_for_language_id(shebang)
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn language_configs(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> {
|
|
self.language_configs.iter()
|
|
}
|
|
|
|
pub fn language_server_configs(&self) -> &HashMap<String, LanguageServerConfiguration> {
|
|
&self.language_server_configs
|
|
}
|
|
|
|
pub fn set_scopes(&self, scopes: Vec<String>) {
|
|
self.scopes.store(Arc::new(scopes));
|
|
|
|
// Reconfigure existing grammars
|
|
for config in self
|
|
.language_configs
|
|
.iter()
|
|
.filter(|cfg| cfg.is_highlight_initialized())
|
|
{
|
|
config.reconfigure(&self.scopes());
|
|
}
|
|
}
|
|
|
|
pub fn scopes(&self) -> Guard<Arc<Vec<String>>> {
|
|
self.scopes.load()
|
|
}
|
|
}
|
|
|
|
pub struct TsParser {
|
|
parser: tree_sitter::Parser,
|
|
pub cursors: Vec<QueryCursor>,
|
|
}
|
|
|
|
// could also just use a pool, or a single instance?
|
|
thread_local! {
|
|
pub static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
|
|
parser: Parser::new(),
|
|
cursors: Vec::new(),
|
|
})
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Syntax {
|
|
layers: HopSlotMap<LayerId, LanguageLayer>,
|
|
root: LayerId,
|
|
loader: Arc<ArcSwap<Loader>>,
|
|
}
|
|
|
|
fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
|
|
Cow::from(source.byte_slice(range))
|
|
}
|
|
|
|
impl Syntax {
|
|
pub fn new(
|
|
source: RopeSlice,
|
|
config: Arc<HighlightConfiguration>,
|
|
loader: Arc<ArcSwap<Loader>>,
|
|
) -> Option<Self> {
|
|
let root_layer = LanguageLayer {
|
|
tree: None,
|
|
config,
|
|
depth: 0,
|
|
flags: LayerUpdateFlags::empty(),
|
|
ranges: vec![Range {
|
|
start_byte: 0,
|
|
end_byte: usize::MAX,
|
|
start_point: Point::new(0, 0),
|
|
end_point: Point::new(usize::MAX, usize::MAX),
|
|
}],
|
|
parent: None,
|
|
};
|
|
|
|
// track scope_descriptor: a Vec of scopes for item in tree
|
|
|
|
let mut layers = HopSlotMap::default();
|
|
let root = layers.insert(root_layer);
|
|
|
|
let mut syntax = Self {
|
|
root,
|
|
layers,
|
|
loader,
|
|
};
|
|
|
|
let res = syntax.update(source, source, &ChangeSet::new(source));
|
|
|
|
if res.is_err() {
|
|
log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");
|
|
return None;
|
|
}
|
|
Some(syntax)
|
|
}
|
|
|
|
pub fn update(
|
|
&mut self,
|
|
old_source: RopeSlice,
|
|
source: RopeSlice,
|
|
changeset: &ChangeSet,
|
|
) -> Result<(), Error> {
|
|
let mut queue = VecDeque::new();
|
|
queue.push_back(self.root);
|
|
|
|
let loader = self.loader.load();
|
|
let scopes = loader.scopes.load();
|
|
let injection_callback = |language: &InjectionLanguageMarker| {
|
|
loader
|
|
.language_configuration_for_injection_string(language)
|
|
.and_then(|language_config| language_config.highlight_config(&scopes))
|
|
};
|
|
|
|
// Convert the changeset into tree sitter edits.
|
|
let edits = generate_edits(old_source, changeset);
|
|
|
|
// This table allows inverse indexing of `layers`.
|
|
// That is by hashing a `Layer` you can find
|
|
// the `LayerId` of an existing equivalent `Layer` in `layers`.
|
|
//
|
|
// It is used to determine if a new layer exists for an injection
|
|
// or if an existing layer needs to be updated.
|
|
let mut layers_table = RawTable::with_capacity(self.layers.len());
|
|
let layers_hasher = RandomState::new();
|
|
// Use the edits to update all layers markers
|
|
fn point_add(a: Point, b: Point) -> Point {
|
|
if b.row > 0 {
|
|
Point::new(a.row.saturating_add(b.row), b.column)
|
|
} else {
|
|
Point::new(0, a.column.saturating_add(b.column))
|
|
}
|
|
}
|
|
fn point_sub(a: Point, b: Point) -> Point {
|
|
if a.row > b.row {
|
|
Point::new(a.row.saturating_sub(b.row), a.column)
|
|
} else {
|
|
Point::new(0, a.column.saturating_sub(b.column))
|
|
}
|
|
}
|
|
|
|
for (layer_id, layer) in self.layers.iter_mut() {
|
|
// The root layer always covers the whole range (0..usize::MAX)
|
|
if layer.depth == 0 {
|
|
layer.flags = LayerUpdateFlags::MODIFIED;
|
|
continue;
|
|
}
|
|
|
|
if !edits.is_empty() {
|
|
for range in &mut layer.ranges {
|
|
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
|
|
for edit in edits.iter().rev() {
|
|
let is_pure_insertion = edit.old_end_byte == edit.start_byte;
|
|
|
|
// if edit is after range, skip
|
|
if edit.start_byte > range.end_byte {
|
|
// TODO: || (is_noop && edit.start_byte == range.end_byte)
|
|
continue;
|
|
}
|
|
|
|
// if edit is before range, shift entire range by len
|
|
if edit.old_end_byte < range.start_byte {
|
|
range.start_byte =
|
|
edit.new_end_byte + (range.start_byte - edit.old_end_byte);
|
|
range.start_point = point_add(
|
|
edit.new_end_position,
|
|
point_sub(range.start_point, edit.old_end_position),
|
|
);
|
|
|
|
range.end_byte = edit
|
|
.new_end_byte
|
|
.saturating_add(range.end_byte - edit.old_end_byte);
|
|
range.end_point = point_add(
|
|
edit.new_end_position,
|
|
point_sub(range.end_point, edit.old_end_position),
|
|
);
|
|
|
|
layer.flags |= LayerUpdateFlags::MOVED;
|
|
}
|
|
// if the edit starts in the space before and extends into the range
|
|
else if edit.start_byte < range.start_byte {
|
|
range.start_byte = edit.new_end_byte;
|
|
range.start_point = edit.new_end_position;
|
|
|
|
range.end_byte = range
|
|
.end_byte
|
|
.saturating_sub(edit.old_end_byte)
|
|
.saturating_add(edit.new_end_byte);
|
|
range.end_point = point_add(
|
|
edit.new_end_position,
|
|
point_sub(range.end_point, edit.old_end_position),
|
|
);
|
|
layer.flags = LayerUpdateFlags::MODIFIED;
|
|
}
|
|
// If the edit is an insertion at the start of the tree, shift
|
|
else if edit.start_byte == range.start_byte && is_pure_insertion {
|
|
range.start_byte = edit.new_end_byte;
|
|
range.start_point = edit.new_end_position;
|
|
layer.flags |= LayerUpdateFlags::MOVED;
|
|
} else {
|
|
range.end_byte = range
|
|
.end_byte
|
|
.saturating_sub(edit.old_end_byte)
|
|
.saturating_add(edit.new_end_byte);
|
|
range.end_point = point_add(
|
|
edit.new_end_position,
|
|
point_sub(range.end_point, edit.old_end_position),
|
|
);
|
|
layer.flags = LayerUpdateFlags::MODIFIED;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let hash = layers_hasher.hash_one(layer);
|
|
// Safety: insert_no_grow is unsafe because it assumes that the table
|
|
// has enough capacity to hold additional elements.
|
|
// This is always the case as we reserved enough capacity above.
|
|
unsafe { layers_table.insert_no_grow(hash, layer_id) };
|
|
}
|
|
|
|
PARSER.with(|ts_parser| {
|
|
let ts_parser = &mut ts_parser.borrow_mut();
|
|
ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours
|
|
let mut cursor = ts_parser.cursors.pop().unwrap_or_default();
|
|
// TODO: might need to set cursor range
|
|
cursor.set_byte_range(0..usize::MAX);
|
|
cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
|
|
|
|
let source_slice = source.slice(..);
|
|
|
|
while let Some(layer_id) = queue.pop_front() {
|
|
let layer = &mut self.layers[layer_id];
|
|
|
|
// Mark the layer as touched
|
|
layer.flags |= LayerUpdateFlags::TOUCHED;
|
|
|
|
// If a tree already exists, notify it of changes.
|
|
if let Some(tree) = &mut layer.tree {
|
|
if layer
|
|
.flags
|
|
.intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED)
|
|
{
|
|
for edit in edits.iter().rev() {
|
|
// Apply the edits in reverse.
|
|
// If we applied them in order then edit 1 would disrupt the positioning of edit 2.
|
|
tree.edit(edit);
|
|
}
|
|
}
|
|
|
|
if layer.flags.contains(LayerUpdateFlags::MODIFIED) {
|
|
// Re-parse the tree.
|
|
layer.parse(&mut ts_parser.parser, source)?;
|
|
}
|
|
} else {
|
|
// always parse if this layer has never been parsed before
|
|
layer.parse(&mut ts_parser.parser, source)?;
|
|
}
|
|
|
|
// Switch to an immutable borrow.
|
|
let layer = &self.layers[layer_id];
|
|
|
|
// Process injections.
|
|
let matches = cursor.matches(
|
|
&layer.config.injections_query,
|
|
layer.tree().root_node(),
|
|
RopeProvider(source_slice),
|
|
);
|
|
let mut combined_injections = vec![
|
|
(None, Vec::new(), IncludedChildren::default());
|
|
layer.config.combined_injections_patterns.len()
|
|
];
|
|
let mut injections = Vec::new();
|
|
let mut last_injection_end = 0;
|
|
for mat in matches {
|
|
let (injection_capture, content_node, included_children) = layer
|
|
.config
|
|
.injection_for_match(&layer.config.injections_query, &mat, source_slice);
|
|
|
|
// in case this is a combined injection save it for more processing later
|
|
if let Some(combined_injection_idx) = layer
|
|
.config
|
|
.combined_injections_patterns
|
|
.iter()
|
|
.position(|&pattern| pattern == mat.pattern_index)
|
|
{
|
|
let entry = &mut combined_injections[combined_injection_idx];
|
|
if injection_capture.is_some() {
|
|
entry.0 = injection_capture;
|
|
}
|
|
if let Some(content_node) = content_node {
|
|
if content_node.start_byte() >= last_injection_end {
|
|
entry.1.push(content_node);
|
|
last_injection_end = content_node.end_byte();
|
|
}
|
|
}
|
|
entry.2 = included_children;
|
|
continue;
|
|
}
|
|
|
|
// Explicitly remove this match so that none of its other captures will remain
|
|
// in the stream of captures.
|
|
mat.remove();
|
|
|
|
// If a language is found with the given name, then add a new language layer
|
|
// to the highlighted document.
|
|
if let (Some(injection_capture), Some(content_node)) =
|
|
(injection_capture, content_node)
|
|
{
|
|
if let Some(config) = (injection_callback)(&injection_capture) {
|
|
let ranges =
|
|
intersect_ranges(&layer.ranges, &[content_node], included_children);
|
|
|
|
if !ranges.is_empty() {
|
|
if content_node.start_byte() < last_injection_end {
|
|
continue;
|
|
}
|
|
last_injection_end = content_node.end_byte();
|
|
injections.push((config, ranges));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (lang_name, content_nodes, included_children) in combined_injections {
|
|
if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
|
|
if let Some(config) = (injection_callback)(&lang_name) {
|
|
let ranges =
|
|
intersect_ranges(&layer.ranges, &content_nodes, included_children);
|
|
if !ranges.is_empty() {
|
|
injections.push((config, ranges));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let depth = layer.depth + 1;
|
|
// TODO: can't inline this since matches borrows self.layers
|
|
for (config, ranges) in injections {
|
|
let parent = Some(layer_id);
|
|
let new_layer = LanguageLayer {
|
|
tree: None,
|
|
config,
|
|
depth,
|
|
ranges,
|
|
flags: LayerUpdateFlags::empty(),
|
|
parent: None,
|
|
};
|
|
|
|
// Find an identical existing layer
|
|
let layer = layers_table
|
|
.get(layers_hasher.hash_one(&new_layer), |&it| {
|
|
self.layers[it] == new_layer
|
|
})
|
|
.copied();
|
|
|
|
// ...or insert a new one.
|
|
let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
|
|
self.layers[layer_id].parent = parent;
|
|
|
|
queue.push_back(layer_id);
|
|
}
|
|
|
|
// TODO: pre-process local scopes at this time, rather than highlight?
|
|
// would solve problems with locals not working across boundaries
|
|
}
|
|
|
|
// Return the cursor back in the pool.
|
|
ts_parser.cursors.push(cursor);
|
|
|
|
// Reset all `LayerUpdateFlags` and remove all untouched layers
|
|
self.layers.retain(|_, layer| {
|
|
replace(&mut layer.flags, LayerUpdateFlags::empty())
|
|
.contains(LayerUpdateFlags::TOUCHED)
|
|
});
|
|
|
|
Ok(())
|
|
})
|
|
}
|
|
|
|
pub fn tree(&self) -> &Tree {
|
|
self.layers[self.root].tree()
|
|
}
|
|
|
|
/// Iterate over the highlighted regions for a given slice of source code.
|
|
pub fn highlight_iter<'a>(
|
|
&'a self,
|
|
source: RopeSlice<'a>,
|
|
range: Option<std::ops::Range<usize>>,
|
|
cancellation_flag: Option<&'a AtomicUsize>,
|
|
) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
|
|
let mut layers = self
|
|
.layers
|
|
.iter()
|
|
.filter_map(|(_, layer)| {
|
|
// TODO: if range doesn't overlap layer range, skip it
|
|
|
|
// Reuse a cursor from the pool if available.
|
|
let mut cursor = PARSER.with(|ts_parser| {
|
|
let highlighter = &mut ts_parser.borrow_mut();
|
|
highlighter.cursors.pop().unwrap_or_default()
|
|
});
|
|
|
|
// The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
|
|
// prevents them from being moved. But both of these values are really just
|
|
// pointers, so it's actually ok to move them.
|
|
let cursor_ref =
|
|
unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
|
|
|
|
// if reusing cursors & no range this resets to whole range
|
|
cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
|
|
cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
|
|
|
|
let mut captures = cursor_ref
|
|
.captures(
|
|
&layer.config.query,
|
|
layer.tree().root_node(),
|
|
RopeProvider(source),
|
|
)
|
|
.peekable();
|
|
|
|
// If there's no captures, skip the layer
|
|
captures.peek()?;
|
|
|
|
Some(HighlightIterLayer {
|
|
highlight_end_stack: Vec::new(),
|
|
scope_stack: vec![LocalScope {
|
|
inherits: false,
|
|
range: 0..usize::MAX,
|
|
local_defs: Vec::new(),
|
|
}],
|
|
cursor,
|
|
_tree: None,
|
|
captures: RefCell::new(captures),
|
|
config: layer.config.as_ref(), // TODO: just reuse `layer`
|
|
depth: layer.depth, // TODO: just reuse `layer`
|
|
})
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
layers.sort_unstable_by_key(|layer| layer.sort_key());
|
|
|
|
let mut result = HighlightIter {
|
|
source,
|
|
byte_offset: range.map_or(0, |r| r.start),
|
|
cancellation_flag,
|
|
iter_count: 0,
|
|
layers,
|
|
next_event: None,
|
|
last_highlight_range: None,
|
|
};
|
|
result.sort_layers();
|
|
result
|
|
}
|
|
|
|
pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
|
|
let mut container_id = self.root;
|
|
|
|
for (layer_id, layer) in self.layers.iter() {
|
|
if layer.depth > self.layers[container_id].depth
|
|
&& layer.contains_byte_range(start, end)
|
|
{
|
|
container_id = layer_id;
|
|
}
|
|
}
|
|
|
|
self.layers[container_id].tree()
|
|
}
|
|
|
|
pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
|
|
self.tree_for_byte_range(start, end)
|
|
.root_node()
|
|
.named_descendant_for_byte_range(start, end)
|
|
}
|
|
|
|
pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
|
|
self.tree_for_byte_range(start, end)
|
|
.root_node()
|
|
.descendant_for_byte_range(start, end)
|
|
}
|
|
|
|
pub fn walk(&self) -> TreeCursor<'_> {
|
|
// data structure to find the smallest range that contains a point
|
|
// when some of the ranges in the structure can overlap.
|
|
TreeCursor::new(&self.layers, self.root)
|
|
}
|
|
|
|
// Commenting
|
|
// comment_strings_for_pos
|
|
// is_commented
|
|
|
|
// Indentation
|
|
// suggested_indent_for_line_at_buffer_row
|
|
// suggested_indent_for_buffer_row
|
|
// indent_level_for_line
|
|
|
|
// TODO: Folding
|
|
}
|
|
|
|
bitflags! {
|
|
/// Flags that track the status of a layer
|
|
/// in the `Sytaxn::update` function
|
|
#[derive(Debug)]
|
|
struct LayerUpdateFlags : u32{
|
|
const MODIFIED = 0b001;
|
|
const MOVED = 0b010;
|
|
const TOUCHED = 0b100;
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct LanguageLayer {
|
|
// mode
|
|
// grammar
|
|
pub config: Arc<HighlightConfiguration>,
|
|
pub(crate) tree: Option<Tree>,
|
|
pub ranges: Vec<Range>,
|
|
pub depth: u32,
|
|
flags: LayerUpdateFlags,
|
|
parent: Option<LayerId>,
|
|
}
|
|
|
|
/// This PartialEq implementation only checks if that
|
|
/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
|
|
/// It does not check whether the layers have the same internal treesitter
|
|
/// state.
|
|
impl PartialEq for LanguageLayer {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.depth == other.depth
|
|
&& self.config.language == other.config.language
|
|
&& self.ranges == other.ranges
|
|
}
|
|
}
|
|
|
|
/// Hash implementation belongs to PartialEq implementation above.
|
|
/// See its documentation for details.
|
|
impl Hash for LanguageLayer {
|
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
self.depth.hash(state);
|
|
self.config.language.hash(state);
|
|
self.ranges.hash(state);
|
|
}
|
|
}
|
|
|
|
impl LanguageLayer {
|
|
pub fn tree(&self) -> &Tree {
|
|
// TODO: no unwrap
|
|
self.tree.as_ref().unwrap()
|
|
}
|
|
|
|
fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> {
|
|
parser
|
|
.set_included_ranges(&self.ranges)
|
|
.map_err(|_| Error::InvalidRanges)?;
|
|
|
|
parser
|
|
.set_language(&self.config.language)
|
|
.map_err(|_| Error::InvalidLanguage)?;
|
|
|
|
// unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
|
|
let tree = parser
|
|
.parse_with(
|
|
&mut |byte, _| {
|
|
if byte <= source.len_bytes() {
|
|
let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
|
|
&chunk.as_bytes()[byte - start_byte..]
|
|
} else {
|
|
// out of range
|
|
&[]
|
|
}
|
|
},
|
|
self.tree.as_ref(),
|
|
)
|
|
.ok_or(Error::Cancelled)?;
|
|
// unsafe { ts_parser.parser.set_cancellation_flag(None) };
|
|
self.tree = Some(tree);
|
|
Ok(())
|
|
}
|
|
|
|
/// Whether the layer contains the given byte range.
|
|
///
|
|
/// If the layer has multiple ranges (i.e. combined injections), the
|
|
/// given range is considered contained if it is within the start and
|
|
/// end bytes of the first and last ranges **and** if the given range
|
|
/// starts or ends within any of the layer's ranges.
|
|
fn contains_byte_range(&self, start: usize, end: usize) -> bool {
|
|
let layer_start = self
|
|
.ranges
|
|
.first()
|
|
.expect("ranges should not be empty")
|
|
.start_byte;
|
|
let layer_end = self
|
|
.ranges
|
|
.last()
|
|
.expect("ranges should not be empty")
|
|
.end_byte;
|
|
|
|
layer_start <= start
|
|
&& layer_end >= end
|
|
&& self.ranges.iter().any(|range| {
|
|
let byte_range = range.start_byte..range.end_byte;
|
|
byte_range.contains(&start) || byte_range.contains(&end)
|
|
})
|
|
}
|
|
}
|
|
|
|
pub(crate) fn generate_edits(
|
|
old_text: RopeSlice,
|
|
changeset: &ChangeSet,
|
|
) -> Vec<tree_sitter::InputEdit> {
|
|
use Operation::*;
|
|
let mut old_pos = 0;
|
|
|
|
let mut edits = Vec::new();
|
|
|
|
if changeset.changes.is_empty() {
|
|
return edits;
|
|
}
|
|
|
|
let mut iter = changeset.changes.iter().peekable();
|
|
|
|
// TODO; this is a lot easier with Change instead of Operation.
|
|
|
|
fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) {
|
|
let byte = text.char_to_byte(pos); // <- attempted to index past end
|
|
let line = text.char_to_line(pos);
|
|
let line_start_byte = text.line_to_byte(line);
|
|
let col = byte - line_start_byte;
|
|
|
|
(byte, Point::new(line, col))
|
|
}
|
|
|
|
fn traverse(point: Point, text: &Tendril) -> Point {
|
|
let Point {
|
|
mut row,
|
|
mut column,
|
|
} = point;
|
|
|
|
// TODO: there should be a better way here.
|
|
let mut chars = text.chars().peekable();
|
|
while let Some(ch) = chars.next() {
|
|
if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
|
|
row += 1;
|
|
column = 0;
|
|
} else {
|
|
column += 1;
|
|
}
|
|
}
|
|
Point { row, column }
|
|
}
|
|
|
|
while let Some(change) = iter.next() {
|
|
let len = match change {
|
|
Delete(i) | Retain(i) => *i,
|
|
Insert(_) => 0,
|
|
};
|
|
let mut old_end = old_pos + len;
|
|
|
|
match change {
|
|
Retain(_) => {}
|
|
Delete(_) => {
|
|
let (start_byte, start_position) = point_at_pos(old_text, old_pos);
|
|
let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
|
|
|
|
// deletion
|
|
edits.push(tree_sitter::InputEdit {
|
|
start_byte, // old_pos to byte
|
|
old_end_byte, // old_end to byte
|
|
new_end_byte: start_byte, // old_pos to byte
|
|
start_position, // old pos to coords
|
|
old_end_position, // old_end to coords
|
|
new_end_position: start_position, // old pos to coords
|
|
});
|
|
}
|
|
Insert(s) => {
|
|
let (start_byte, start_position) = point_at_pos(old_text, old_pos);
|
|
|
|
// a subsequent delete means a replace, consume it
|
|
if let Some(Delete(len)) = iter.peek() {
|
|
old_end = old_pos + len;
|
|
let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
|
|
|
|
iter.next();
|
|
|
|
// replacement
|
|
edits.push(tree_sitter::InputEdit {
|
|
start_byte, // old_pos to byte
|
|
old_end_byte, // old_end to byte
|
|
new_end_byte: start_byte + s.len(), // old_pos to byte + s.len()
|
|
start_position, // old pos to coords
|
|
old_end_position, // old_end to coords
|
|
new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
|
|
});
|
|
} else {
|
|
// insert
|
|
edits.push(tree_sitter::InputEdit {
|
|
start_byte, // old_pos to byte
|
|
old_end_byte: start_byte, // same
|
|
new_end_byte: start_byte + s.len(), // old_pos + s.len()
|
|
start_position, // old pos to coords
|
|
old_end_position: start_position, // same
|
|
new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
|
|
});
|
|
}
|
|
}
|
|
}
|
|
old_pos = old_end;
|
|
}
|
|
edits
|
|
}
|
|
|
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
use std::{iter, mem, ops, str, usize};
|
|
use tree_sitter::{
|
|
Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
|
|
QueryMatch, Range, TextProvider, Tree,
|
|
};
|
|
|
|
const CANCELLATION_CHECK_INTERVAL: usize = 100;
|
|
|
|
/// Indicates which highlight should be applied to a region of source code.
|
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
|
pub struct Highlight(pub usize);
|
|
|
|
/// Represents the reason why syntax highlighting failed.
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub enum Error {
|
|
Cancelled,
|
|
InvalidLanguage,
|
|
InvalidRanges,
|
|
Unknown,
|
|
}
|
|
|
|
/// Represents a single step in rendering a syntax-highlighted document.
|
|
#[derive(Copy, Clone, Debug)]
|
|
pub enum HighlightEvent {
|
|
Source { start: usize, end: usize },
|
|
HighlightStart(Highlight),
|
|
HighlightEnd,
|
|
}
|
|
|
|
/// Contains the data needed to highlight code written in a particular language.
|
|
///
|
|
/// This struct is immutable and can be shared between threads.
|
|
#[derive(Debug)]
|
|
pub struct HighlightConfiguration {
|
|
pub language: Grammar,
|
|
pub query: Query,
|
|
injections_query: Query,
|
|
combined_injections_patterns: Vec<usize>,
|
|
highlights_pattern_index: usize,
|
|
highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
|
|
non_local_variable_patterns: Vec<bool>,
|
|
injection_content_capture_index: Option<u32>,
|
|
injection_language_capture_index: Option<u32>,
|
|
injection_filename_capture_index: Option<u32>,
|
|
injection_shebang_capture_index: Option<u32>,
|
|
local_scope_capture_index: Option<u32>,
|
|
local_def_capture_index: Option<u32>,
|
|
local_def_value_capture_index: Option<u32>,
|
|
local_ref_capture_index: Option<u32>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct LocalDef<'a> {
|
|
name: Cow<'a, str>,
|
|
value_range: ops::Range<usize>,
|
|
highlight: Option<Highlight>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct LocalScope<'a> {
|
|
inherits: bool,
|
|
range: ops::Range<usize>,
|
|
local_defs: Vec<LocalDef<'a>>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct HighlightIter<'a> {
|
|
source: RopeSlice<'a>,
|
|
byte_offset: usize,
|
|
cancellation_flag: Option<&'a AtomicUsize>,
|
|
layers: Vec<HighlightIterLayer<'a>>,
|
|
iter_count: usize,
|
|
next_event: Option<HighlightEvent>,
|
|
last_highlight_range: Option<(usize, usize, u32)>,
|
|
}
|
|
|
|
// Adapter to convert rope chunks to bytes
|
|
pub struct ChunksBytes<'a> {
|
|
chunks: ropey::iter::Chunks<'a>,
|
|
}
|
|
impl<'a> Iterator for ChunksBytes<'a> {
|
|
type Item = &'a [u8];
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.chunks.next().map(str::as_bytes)
|
|
}
|
|
}
|
|
|
|
pub struct RopeProvider<'a>(pub RopeSlice<'a>);
|
|
impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> {
|
|
type I = ChunksBytes<'a>;
|
|
|
|
fn text(&mut self, node: Node) -> Self::I {
|
|
let fragment = self.0.byte_slice(node.start_byte()..node.end_byte());
|
|
ChunksBytes {
|
|
chunks: fragment.chunks(),
|
|
}
|
|
}
|
|
}
|
|
|
|
struct HighlightIterLayer<'a> {
|
|
_tree: Option<Tree>,
|
|
cursor: QueryCursor,
|
|
captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
|
|
config: &'a HighlightConfiguration,
|
|
highlight_end_stack: Vec<usize>,
|
|
scope_stack: Vec<LocalScope<'a>>,
|
|
depth: u32,
|
|
}
|
|
|
|
impl<'a> fmt::Debug for HighlightIterLayer<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
f.debug_struct("HighlightIterLayer").finish()
|
|
}
|
|
}
|
|
|
|
impl HighlightConfiguration {
|
|
/// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
|
|
/// queries.
|
|
///
|
|
/// # Parameters
|
|
///
|
|
/// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
|
|
/// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
|
|
/// should be non-empty, otherwise no syntax highlights will be added.
|
|
/// * `injections_query` - A string containing tree patterns for injecting other languages
|
|
/// into the document. This can be empty if no injections are desired.
|
|
/// * `locals_query` - A string containing tree patterns for tracking local variable
|
|
/// definitions and references. This can be empty if local variable tracking is not needed.
|
|
///
|
|
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
|
|
pub fn new(
|
|
language: Grammar,
|
|
highlights_query: &str,
|
|
injection_query: &str,
|
|
locals_query: &str,
|
|
) -> Result<Self, QueryError> {
|
|
// Concatenate the query strings, keeping track of the start offset of each section.
|
|
let mut query_source = String::new();
|
|
query_source.push_str(locals_query);
|
|
let highlights_query_offset = query_source.len();
|
|
query_source.push_str(highlights_query);
|
|
|
|
// Construct a single query by concatenating the three query strings, but record the
|
|
// range of pattern indices that belong to each individual string.
|
|
let query = Query::new(&language, &query_source)?;
|
|
let mut highlights_pattern_index = 0;
|
|
for i in 0..(query.pattern_count()) {
|
|
let pattern_offset = query.start_byte_for_pattern(i);
|
|
if pattern_offset < highlights_query_offset {
|
|
highlights_pattern_index += 1;
|
|
}
|
|
}
|
|
|
|
let injections_query = Query::new(&language, injection_query)?;
|
|
let combined_injections_patterns = (0..injections_query.pattern_count())
|
|
.filter(|&i| {
|
|
injections_query
|
|
.property_settings(i)
|
|
.iter()
|
|
.any(|s| &*s.key == "injection.combined")
|
|
})
|
|
.collect();
|
|
|
|
// Find all of the highlighting patterns that are disabled for nodes that
|
|
// have been identified as local variables.
|
|
let non_local_variable_patterns = (0..query.pattern_count())
|
|
.map(|i| {
|
|
query
|
|
.property_predicates(i)
|
|
.iter()
|
|
.any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
|
|
})
|
|
.collect();
|
|
|
|
// Store the numeric ids for all of the special captures.
|
|
let mut injection_content_capture_index = None;
|
|
let mut injection_language_capture_index = None;
|
|
let mut injection_filename_capture_index = None;
|
|
let mut injection_shebang_capture_index = None;
|
|
let mut local_def_capture_index = None;
|
|
let mut local_def_value_capture_index = None;
|
|
let mut local_ref_capture_index = None;
|
|
let mut local_scope_capture_index = None;
|
|
for (i, name) in query.capture_names().iter().enumerate() {
|
|
let i = Some(i as u32);
|
|
match *name {
|
|
"local.definition" => local_def_capture_index = i,
|
|
"local.definition-value" => local_def_value_capture_index = i,
|
|
"local.reference" => local_ref_capture_index = i,
|
|
"local.scope" => local_scope_capture_index = i,
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
for (i, name) in injections_query.capture_names().iter().enumerate() {
|
|
let i = Some(i as u32);
|
|
match *name {
|
|
"injection.content" => injection_content_capture_index = i,
|
|
"injection.language" => injection_language_capture_index = i,
|
|
"injection.filename" => injection_filename_capture_index = i,
|
|
"injection.shebang" => injection_shebang_capture_index = i,
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]);
|
|
Ok(Self {
|
|
language,
|
|
query,
|
|
injections_query,
|
|
combined_injections_patterns,
|
|
highlights_pattern_index,
|
|
highlight_indices,
|
|
non_local_variable_patterns,
|
|
injection_content_capture_index,
|
|
injection_language_capture_index,
|
|
injection_filename_capture_index,
|
|
injection_shebang_capture_index,
|
|
local_scope_capture_index,
|
|
local_def_capture_index,
|
|
local_def_value_capture_index,
|
|
local_ref_capture_index,
|
|
})
|
|
}
|
|
|
|
/// Get a slice containing all of the highlight names used in the configuration.
|
|
pub fn names(&self) -> &[&str] {
|
|
self.query.capture_names()
|
|
}
|
|
|
|
/// Set the list of recognized highlight names.
|
|
///
|
|
/// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
|
|
/// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
|
|
/// these queries can choose to recognize highlights with different levels of specificity.
|
|
/// For example, the string `function.builtin` will match against `function.builtin.constructor`
|
|
/// but will not match `function.method.builtin` and `function.method`.
|
|
///
|
|
/// When highlighting, results are returned as `Highlight` values, which contain the index
|
|
/// of the matched highlight this list of highlight names.
|
|
pub fn configure(&self, recognized_names: &[String]) {
|
|
let mut capture_parts = Vec::new();
|
|
let indices: Vec<_> = self
|
|
.query
|
|
.capture_names()
|
|
.iter()
|
|
.map(move |capture_name| {
|
|
capture_parts.clear();
|
|
capture_parts.extend(capture_name.split('.'));
|
|
|
|
let mut best_index = None;
|
|
let mut best_match_len = 0;
|
|
for (i, recognized_name) in recognized_names.iter().enumerate() {
|
|
let mut len = 0;
|
|
let mut matches = true;
|
|
for (i, part) in recognized_name.split('.').enumerate() {
|
|
match capture_parts.get(i) {
|
|
Some(capture_part) if *capture_part == part => len += 1,
|
|
_ => {
|
|
matches = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if matches && len > best_match_len {
|
|
best_index = Some(i);
|
|
best_match_len = len;
|
|
}
|
|
}
|
|
best_index.map(Highlight)
|
|
})
|
|
.collect();
|
|
|
|
self.highlight_indices.store(Arc::new(indices));
|
|
}
|
|
|
|
fn injection_pair<'a>(
|
|
&self,
|
|
query_match: &QueryMatch<'a, 'a>,
|
|
source: RopeSlice<'a>,
|
|
) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
|
|
let mut injection_capture = None;
|
|
let mut content_node = None;
|
|
|
|
for capture in query_match.captures {
|
|
let index = Some(capture.index);
|
|
if index == self.injection_language_capture_index {
|
|
let name = byte_range_to_str(capture.node.byte_range(), source);
|
|
injection_capture = Some(InjectionLanguageMarker::Name(name));
|
|
} else if index == self.injection_filename_capture_index {
|
|
let name = byte_range_to_str(capture.node.byte_range(), source);
|
|
let path = Path::new(name.as_ref()).to_path_buf();
|
|
injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
|
|
} else if index == self.injection_shebang_capture_index {
|
|
let node_slice = source.byte_slice(capture.node.byte_range());
|
|
|
|
// some languages allow space and newlines before the actual string content
|
|
// so a shebang could be on either the first or second line
|
|
let lines = if let Ok(end) = node_slice.try_line_to_byte(2) {
|
|
node_slice.byte_slice(..end)
|
|
} else {
|
|
node_slice
|
|
};
|
|
|
|
static SHEBANG_REGEX: Lazy<rope::Regex> =
|
|
Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
|
|
|
|
injection_capture = SHEBANG_REGEX
|
|
.captures_iter(lines.regex_input())
|
|
.map(|cap| {
|
|
let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
|
|
InjectionLanguageMarker::Shebang(cap.into())
|
|
})
|
|
.next()
|
|
} else if index == self.injection_content_capture_index {
|
|
content_node = Some(capture.node);
|
|
}
|
|
}
|
|
(injection_capture, content_node)
|
|
}
|
|
|
|
fn injection_for_match<'a>(
|
|
&self,
|
|
query: &'a Query,
|
|
query_match: &QueryMatch<'a, 'a>,
|
|
source: RopeSlice<'a>,
|
|
) -> (
|
|
Option<InjectionLanguageMarker<'a>>,
|
|
Option<Node<'a>>,
|
|
IncludedChildren,
|
|
) {
|
|
let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
|
|
|
|
let mut included_children = IncludedChildren::default();
|
|
for prop in query.property_settings(query_match.pattern_index) {
|
|
match prop.key.as_ref() {
|
|
// In addition to specifying the language name via the text of a
|
|
// captured node, it can also be hard-coded via a `#set!` predicate
|
|
// that sets the injection.language key.
|
|
"injection.language" if injection_capture.is_none() => {
|
|
injection_capture = prop
|
|
.value
|
|
.as_ref()
|
|
.map(|s| InjectionLanguageMarker::Name(s.as_ref().into()));
|
|
}
|
|
|
|
// By default, injections do not include the *children* of an
|
|
// `injection.content` node - only the ranges that belong to the
|
|
// node itself. This can be changed using a `#set!` predicate that
|
|
// sets the `injection.include-children` key.
|
|
"injection.include-children" => included_children = IncludedChildren::All,
|
|
|
|
// Some queries might only exclude named children but include unnamed
|
|
// children in their `injection.content` node. This can be enabled using
|
|
// a `#set!` predicate that sets the `injection.include-unnamed-children` key.
|
|
"injection.include-unnamed-children" => {
|
|
included_children = IncludedChildren::Unnamed
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
(injection_capture, content_node, included_children)
|
|
}
|
|
}
|
|
|
|
impl<'a> HighlightIterLayer<'a> {
|
|
// First, sort scope boundaries by their byte offset in the document. At a
|
|
// given position, emit scope endings before scope beginnings. Finally, emit
|
|
// scope boundaries from deeper layers first.
|
|
fn sort_key(&self) -> Option<(usize, bool, isize)> {
|
|
let depth = -(self.depth as isize);
|
|
let next_start = self
|
|
.captures
|
|
.borrow_mut()
|
|
.peek()
|
|
.map(|(m, i)| m.captures[*i].node.start_byte());
|
|
let next_end = self.highlight_end_stack.last().cloned();
|
|
match (next_start, next_end) {
|
|
(Some(start), Some(end)) => {
|
|
if start < end {
|
|
Some((start, true, depth))
|
|
} else {
|
|
Some((end, false, depth))
|
|
}
|
|
}
|
|
(Some(i), None) => Some((i, true, depth)),
|
|
(None, Some(j)) => Some((j, false, depth)),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
enum IncludedChildren {
|
|
None,
|
|
All,
|
|
Unnamed,
|
|
}
|
|
|
|
impl Default for IncludedChildren {
|
|
fn default() -> Self {
|
|
Self::None
|
|
}
|
|
}
|
|
|
|
// Compute the ranges that should be included when parsing an injection.
|
|
// This takes into account three things:
|
|
// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
|
|
// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
|
|
// are the ranges of those nodes.
|
|
// * `includes_children` - For some injections, the content nodes' children should be
|
|
// excluded from the nested document, so that only the content nodes' *own* content
|
|
// is reparsed. For other injections, the content nodes' entire ranges should be
|
|
// reparsed, including the ranges of their children.
|
|
fn intersect_ranges(
|
|
parent_ranges: &[Range],
|
|
nodes: &[Node],
|
|
included_children: IncludedChildren,
|
|
) -> Vec<Range> {
|
|
let mut cursor = nodes[0].walk();
|
|
let mut result = Vec::new();
|
|
let mut parent_range_iter = parent_ranges.iter();
|
|
let mut parent_range = parent_range_iter
|
|
.next()
|
|
.expect("Layers should only be constructed with non-empty ranges vectors");
|
|
for node in nodes.iter() {
|
|
let mut preceding_range = Range {
|
|
start_byte: 0,
|
|
start_point: Point::new(0, 0),
|
|
end_byte: node.start_byte(),
|
|
end_point: node.start_position(),
|
|
};
|
|
let following_range = Range {
|
|
start_byte: node.end_byte(),
|
|
start_point: node.end_position(),
|
|
end_byte: usize::MAX,
|
|
end_point: Point::new(usize::MAX, usize::MAX),
|
|
};
|
|
|
|
for excluded_range in node
|
|
.children(&mut cursor)
|
|
.filter_map(|child| match included_children {
|
|
IncludedChildren::None => Some(child.range()),
|
|
IncludedChildren::All => None,
|
|
IncludedChildren::Unnamed => {
|
|
if child.is_named() {
|
|
Some(child.range())
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
})
|
|
.chain([following_range].iter().cloned())
|
|
{
|
|
let mut range = Range {
|
|
start_byte: preceding_range.end_byte,
|
|
start_point: preceding_range.end_point,
|
|
end_byte: excluded_range.start_byte,
|
|
end_point: excluded_range.start_point,
|
|
};
|
|
preceding_range = excluded_range;
|
|
|
|
if range.end_byte < parent_range.start_byte {
|
|
continue;
|
|
}
|
|
|
|
while parent_range.start_byte <= range.end_byte {
|
|
if parent_range.end_byte > range.start_byte {
|
|
if range.start_byte < parent_range.start_byte {
|
|
range.start_byte = parent_range.start_byte;
|
|
range.start_point = parent_range.start_point;
|
|
}
|
|
|
|
if parent_range.end_byte < range.end_byte {
|
|
if range.start_byte < parent_range.end_byte {
|
|
result.push(Range {
|
|
start_byte: range.start_byte,
|
|
start_point: range.start_point,
|
|
end_byte: parent_range.end_byte,
|
|
end_point: parent_range.end_point,
|
|
});
|
|
}
|
|
range.start_byte = parent_range.end_byte;
|
|
range.start_point = parent_range.end_point;
|
|
} else {
|
|
if range.start_byte < range.end_byte {
|
|
result.push(range);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if let Some(next_range) = parent_range_iter.next() {
|
|
parent_range = next_range;
|
|
} else {
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
impl<'a> HighlightIter<'a> {
|
|
fn emit_event(
|
|
&mut self,
|
|
offset: usize,
|
|
event: Option<HighlightEvent>,
|
|
) -> Option<Result<HighlightEvent, Error>> {
|
|
let result;
|
|
if self.byte_offset < offset {
|
|
result = Some(Ok(HighlightEvent::Source {
|
|
start: self.byte_offset,
|
|
end: offset,
|
|
}));
|
|
self.byte_offset = offset;
|
|
self.next_event = event;
|
|
} else {
|
|
result = event.map(Ok);
|
|
}
|
|
self.sort_layers();
|
|
result
|
|
}
|
|
|
|
fn sort_layers(&mut self) {
|
|
while !self.layers.is_empty() {
|
|
if let Some(sort_key) = self.layers[0].sort_key() {
|
|
let mut i = 0;
|
|
while i + 1 < self.layers.len() {
|
|
if let Some(next_offset) = self.layers[i + 1].sort_key() {
|
|
if next_offset < sort_key {
|
|
i += 1;
|
|
continue;
|
|
}
|
|
} else {
|
|
let layer = self.layers.remove(i + 1);
|
|
PARSER.with(|ts_parser| {
|
|
let highlighter = &mut ts_parser.borrow_mut();
|
|
highlighter.cursors.push(layer.cursor);
|
|
});
|
|
}
|
|
break;
|
|
}
|
|
if i > 0 {
|
|
self.layers[0..(i + 1)].rotate_left(1);
|
|
}
|
|
break;
|
|
} else {
|
|
let layer = self.layers.remove(0);
|
|
PARSER.with(|ts_parser| {
|
|
let highlighter = &mut ts_parser.borrow_mut();
|
|
highlighter.cursors.push(layer.cursor);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for HighlightIter<'a> {
|
|
type Item = Result<HighlightEvent, Error>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
'main: loop {
|
|
// If we've already determined the next highlight boundary, just return it.
|
|
if let Some(e) = self.next_event.take() {
|
|
return Some(Ok(e));
|
|
}
|
|
|
|
// Periodically check for cancellation, returning `Cancelled` error if the
|
|
// cancellation flag was flipped.
|
|
if let Some(cancellation_flag) = self.cancellation_flag {
|
|
self.iter_count += 1;
|
|
if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
|
|
self.iter_count = 0;
|
|
if cancellation_flag.load(Ordering::Relaxed) != 0 {
|
|
return Some(Err(Error::Cancelled));
|
|
}
|
|
}
|
|
}
|
|
|
|
// If none of the layers have any more highlight boundaries, terminate.
|
|
if self.layers.is_empty() {
|
|
let len = self.source.len_bytes();
|
|
return if self.byte_offset < len {
|
|
let result = Some(Ok(HighlightEvent::Source {
|
|
start: self.byte_offset,
|
|
end: len,
|
|
}));
|
|
self.byte_offset = len;
|
|
result
|
|
} else {
|
|
None
|
|
};
|
|
}
|
|
|
|
// Get the next capture from whichever layer has the earliest highlight boundary.
|
|
let range;
|
|
let layer = &mut self.layers[0];
|
|
let captures = layer.captures.get_mut();
|
|
if let Some((next_match, capture_index)) = captures.peek() {
|
|
let next_capture = next_match.captures[*capture_index];
|
|
range = next_capture.node.byte_range();
|
|
|
|
// If any previous highlight ends before this node starts, then before
|
|
// processing this capture, emit the source code up until the end of the
|
|
// previous highlight, and an end event for that highlight.
|
|
if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
|
|
if end_byte <= range.start {
|
|
layer.highlight_end_stack.pop();
|
|
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
|
|
}
|
|
}
|
|
}
|
|
// If there are no more captures, then emit any remaining highlight end events.
|
|
// And if there are none of those, then just advance to the end of the document.
|
|
else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
|
|
layer.highlight_end_stack.pop();
|
|
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
|
|
} else {
|
|
return self.emit_event(self.source.len_bytes(), None);
|
|
};
|
|
|
|
let (mut match_, capture_index) = captures.next().unwrap();
|
|
let mut capture = match_.captures[capture_index];
|
|
|
|
// Remove from the local scope stack any local scopes that have already ended.
|
|
while range.start > layer.scope_stack.last().unwrap().range.end {
|
|
layer.scope_stack.pop();
|
|
}
|
|
|
|
// If this capture is for tracking local variables, then process the
|
|
// local variable info.
|
|
let mut reference_highlight = None;
|
|
let mut definition_highlight = None;
|
|
while match_.pattern_index < layer.config.highlights_pattern_index {
|
|
// If the node represents a local scope, push a new local scope onto
|
|
// the scope stack.
|
|
if Some(capture.index) == layer.config.local_scope_capture_index {
|
|
definition_highlight = None;
|
|
let mut scope = LocalScope {
|
|
inherits: true,
|
|
range: range.clone(),
|
|
local_defs: Vec::new(),
|
|
};
|
|
for prop in layer.config.query.property_settings(match_.pattern_index) {
|
|
if let "local.scope-inherits" = prop.key.as_ref() {
|
|
scope.inherits =
|
|
prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
|
|
}
|
|
}
|
|
layer.scope_stack.push(scope);
|
|
}
|
|
// If the node represents a definition, add a new definition to the
|
|
// local scope at the top of the scope stack.
|
|
else if Some(capture.index) == layer.config.local_def_capture_index {
|
|
reference_highlight = None;
|
|
let scope = layer.scope_stack.last_mut().unwrap();
|
|
|
|
let mut value_range = 0..0;
|
|
for capture in match_.captures {
|
|
if Some(capture.index) == layer.config.local_def_value_capture_index {
|
|
value_range = capture.node.byte_range();
|
|
}
|
|
}
|
|
|
|
let name = byte_range_to_str(range.clone(), self.source);
|
|
scope.local_defs.push(LocalDef {
|
|
name,
|
|
value_range,
|
|
highlight: None,
|
|
});
|
|
definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
|
|
}
|
|
// If the node represents a reference, then try to find the corresponding
|
|
// definition in the scope stack.
|
|
else if Some(capture.index) == layer.config.local_ref_capture_index
|
|
&& definition_highlight.is_none()
|
|
{
|
|
definition_highlight = None;
|
|
let name = byte_range_to_str(range.clone(), self.source);
|
|
for scope in layer.scope_stack.iter().rev() {
|
|
if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
|
|
if def.name == name && range.start >= def.value_range.end {
|
|
Some(def.highlight)
|
|
} else {
|
|
None
|
|
}
|
|
}) {
|
|
reference_highlight = highlight;
|
|
break;
|
|
}
|
|
if !scope.inherits {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Continue processing any additional matches for the same node.
|
|
if let Some((next_match, next_capture_index)) = captures.peek() {
|
|
let next_capture = next_match.captures[*next_capture_index];
|
|
if next_capture.node == capture.node {
|
|
capture = next_capture;
|
|
match_ = captures.next().unwrap().0;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
self.sort_layers();
|
|
continue 'main;
|
|
}
|
|
|
|
// Otherwise, this capture must represent a highlight.
|
|
// If this exact range has already been highlighted by an earlier pattern, or by
|
|
// a different layer, then skip over this one.
|
|
if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
|
|
if range.start == last_start && range.end == last_end && layer.depth < last_depth {
|
|
self.sort_layers();
|
|
continue 'main;
|
|
}
|
|
}
|
|
|
|
// If the current node was found to be a local variable, then skip over any
|
|
// highlighting patterns that are disabled for local variables.
|
|
if definition_highlight.is_some() || reference_highlight.is_some() {
|
|
while layer.config.non_local_variable_patterns[match_.pattern_index] {
|
|
match_.remove();
|
|
if let Some((next_match, next_capture_index)) = captures.peek() {
|
|
let next_capture = next_match.captures[*next_capture_index];
|
|
if next_capture.node == capture.node {
|
|
capture = next_capture;
|
|
match_ = captures.next().unwrap().0;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
self.sort_layers();
|
|
continue 'main;
|
|
}
|
|
}
|
|
|
|
// Once a highlighting pattern is found for the current node, skip over
|
|
// any later highlighting patterns that also match this node. Captures
|
|
// for a given node are ordered by pattern index, so these subsequent
|
|
// captures are guaranteed to be for highlighting, not injections or
|
|
// local variables.
|
|
while let Some((next_match, next_capture_index)) = captures.peek() {
|
|
let next_capture = next_match.captures[*next_capture_index];
|
|
if next_capture.node == capture.node {
|
|
captures.next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
|
|
|
|
// If this node represents a local definition, then store the current
|
|
// highlight value on the local scope entry representing this node.
|
|
if let Some(definition_highlight) = definition_highlight {
|
|
*definition_highlight = current_highlight;
|
|
}
|
|
|
|
// Emit a scope start event and push the node's end position to the stack.
|
|
if let Some(highlight) = reference_highlight.or(current_highlight) {
|
|
self.last_highlight_range = Some((range.start, range.end, layer.depth));
|
|
layer.highlight_end_stack.push(range.end);
|
|
return self
|
|
.emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
|
|
}
|
|
|
|
self.sort_layers();
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum InjectionLanguageMarker<'a> {
|
|
Name(Cow<'a, str>),
|
|
Filename(Cow<'a, Path>),
|
|
Shebang(String),
|
|
}
|
|
|
|
const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
|
|
|
|
pub struct Merge<I> {
|
|
iter: I,
|
|
spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>,
|
|
|
|
next_event: Option<HighlightEvent>,
|
|
next_span: Option<(usize, std::ops::Range<usize>)>,
|
|
|
|
queue: Vec<HighlightEvent>,
|
|
}
|
|
|
|
/// Merge a list of spans into the highlight event stream.
|
|
pub fn merge<I: Iterator<Item = HighlightEvent>>(
|
|
iter: I,
|
|
spans: Vec<(usize, std::ops::Range<usize>)>,
|
|
) -> Merge<I> {
|
|
let spans = Box::new(spans.into_iter());
|
|
let mut merge = Merge {
|
|
iter,
|
|
spans,
|
|
next_event: None,
|
|
next_span: None,
|
|
queue: Vec::new(),
|
|
};
|
|
merge.next_event = merge.iter.next();
|
|
merge.next_span = merge.spans.next();
|
|
merge
|
|
}
|
|
|
|
impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> {
|
|
type Item = HighlightEvent;
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
use HighlightEvent::*;
|
|
if let Some(event) = self.queue.pop() {
|
|
return Some(event);
|
|
}
|
|
|
|
loop {
|
|
match (self.next_event, &self.next_span) {
|
|
// this happens when range is partially or fully offscreen
|
|
(Some(Source { start, .. }), Some((span, range))) if start > range.start => {
|
|
if start > range.end {
|
|
self.next_span = self.spans.next();
|
|
} else {
|
|
self.next_span = Some((*span, start..range.end));
|
|
};
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
match (self.next_event, &self.next_span) {
|
|
(Some(HighlightStart(i)), _) => {
|
|
self.next_event = self.iter.next();
|
|
Some(HighlightStart(i))
|
|
}
|
|
(Some(HighlightEnd), _) => {
|
|
self.next_event = self.iter.next();
|
|
Some(HighlightEnd)
|
|
}
|
|
(Some(Source { start, end }), Some((_, range))) if start < range.start => {
|
|
let intersect = range.start.min(end);
|
|
let event = Source {
|
|
start,
|
|
end: intersect,
|
|
};
|
|
|
|
if end == intersect {
|
|
// the event is complete
|
|
self.next_event = self.iter.next();
|
|
} else {
|
|
// subslice the event
|
|
self.next_event = Some(Source {
|
|
start: intersect,
|
|
end,
|
|
});
|
|
};
|
|
|
|
Some(event)
|
|
}
|
|
(Some(Source { start, end }), Some((span, range))) if start == range.start => {
|
|
let intersect = range.end.min(end);
|
|
let event = HighlightStart(Highlight(*span));
|
|
|
|
// enqueue in reverse order
|
|
self.queue.push(HighlightEnd);
|
|
self.queue.push(Source {
|
|
start,
|
|
end: intersect,
|
|
});
|
|
|
|
if end == intersect {
|
|
// the event is complete
|
|
self.next_event = self.iter.next();
|
|
} else {
|
|
// subslice the event
|
|
self.next_event = Some(Source {
|
|
start: intersect,
|
|
end,
|
|
});
|
|
};
|
|
|
|
if intersect == range.end {
|
|
self.next_span = self.spans.next();
|
|
} else {
|
|
self.next_span = Some((*span, intersect..range.end));
|
|
}
|
|
|
|
Some(event)
|
|
}
|
|
(Some(event), None) => {
|
|
self.next_event = self.iter.next();
|
|
Some(event)
|
|
}
|
|
// Can happen if cursor at EOF and/or diagnostic reaches past the end.
|
|
// We need to actually emit events for the cursor-at-EOF situation,
|
|
// even though the range is past the end of the text. This needs to be
|
|
// handled appropriately by the drawing code by not assuming that
|
|
// all `Source` events point to valid indices in the rope.
|
|
(None, Some((span, range))) => {
|
|
let event = HighlightStart(Highlight(*span));
|
|
self.queue.push(HighlightEnd);
|
|
self.queue.push(Source {
|
|
start: range.start,
|
|
end: range.end,
|
|
});
|
|
self.next_span = self.spans.next();
|
|
Some(event)
|
|
}
|
|
(None, None) => None,
|
|
e => unreachable!("{:?}", e),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn node_is_visible(node: &Node) -> bool {
|
|
node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
|
|
}
|
|
|
|
pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result {
|
|
if node.child_count() == 0 {
|
|
if node_is_visible(&node) {
|
|
write!(fmt, "({})", node.kind())
|
|
} else {
|
|
write!(fmt, "\"{}\"", node.kind())
|
|
}
|
|
} else {
|
|
pretty_print_tree_impl(fmt, &mut node.walk(), 0)
|
|
}
|
|
}
|
|
|
|
fn pretty_print_tree_impl<W: fmt::Write>(
|
|
fmt: &mut W,
|
|
cursor: &mut tree_sitter::TreeCursor,
|
|
depth: usize,
|
|
) -> fmt::Result {
|
|
let node = cursor.node();
|
|
let visible = node_is_visible(&node);
|
|
|
|
if visible {
|
|
let indentation_columns = depth * 2;
|
|
write!(fmt, "{:indentation_columns$}", "")?;
|
|
|
|
if let Some(field_name) = cursor.field_name() {
|
|
write!(fmt, "{}: ", field_name)?;
|
|
}
|
|
|
|
write!(fmt, "({}", node.kind())?;
|
|
}
|
|
|
|
// Handle children.
|
|
if cursor.goto_first_child() {
|
|
loop {
|
|
if node_is_visible(&cursor.node()) {
|
|
fmt.write_char('\n')?;
|
|
}
|
|
|
|
pretty_print_tree_impl(fmt, cursor, depth + 1)?;
|
|
|
|
if !cursor.goto_next_sibling() {
|
|
break;
|
|
}
|
|
}
|
|
|
|
let moved = cursor.goto_parent();
|
|
// The parent of the first child must exist, and must be `node`.
|
|
debug_assert!(moved);
|
|
debug_assert!(cursor.node() == node);
|
|
}
|
|
|
|
if visible {
|
|
fmt.write_char(')')?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use crate::{Rope, Transaction};
|
|
|
|
#[test]
|
|
fn test_textobject_queries() {
|
|
let query_str = r#"
|
|
(line_comment)+ @quantified_nodes
|
|
((line_comment)+) @quantified_nodes_grouped
|
|
((line_comment) (line_comment)) @multiple_nodes_grouped
|
|
"#;
|
|
let source = Rope::from_str(
|
|
r#"
|
|
/// a comment on
|
|
/// multiple lines
|
|
"#,
|
|
);
|
|
|
|
let loader = Loader::new(Configuration {
|
|
language: vec![],
|
|
language_server: HashMap::new(),
|
|
})
|
|
.unwrap();
|
|
let language = get_language("rust").unwrap();
|
|
|
|
let query = Query::new(&language, query_str).unwrap();
|
|
let textobject = TextObjectQuery { query };
|
|
let mut cursor = QueryCursor::new();
|
|
|
|
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
|
|
let syntax = Syntax::new(
|
|
source.slice(..),
|
|
Arc::new(config),
|
|
Arc::new(ArcSwap::from_pointee(loader)),
|
|
)
|
|
.unwrap();
|
|
|
|
let root = syntax.tree().root_node();
|
|
let mut test = |capture, range| {
|
|
let matches: Vec<_> = textobject
|
|
.capture_nodes(capture, root, source.slice(..), &mut cursor)
|
|
.unwrap()
|
|
.collect();
|
|
|
|
assert_eq!(
|
|
matches[0].byte_range(),
|
|
range,
|
|
"@{} expected {:?}",
|
|
capture,
|
|
range
|
|
)
|
|
};
|
|
|
|
test("quantified_nodes", 1..37);
|
|
// NOTE: Enable after implementing proper node group capturing
|
|
// test("quantified_nodes_grouped", 1..37);
|
|
// test("multiple_nodes_grouped", 1..37);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parser() {
|
|
let highlight_names: Vec<String> = [
|
|
"attribute",
|
|
"constant",
|
|
"function.builtin",
|
|
"function",
|
|
"keyword",
|
|
"operator",
|
|
"property",
|
|
"punctuation",
|
|
"punctuation.bracket",
|
|
"punctuation.delimiter",
|
|
"string",
|
|
"string.special",
|
|
"tag",
|
|
"type",
|
|
"type.builtin",
|
|
"variable",
|
|
"variable.builtin",
|
|
"variable.parameter",
|
|
]
|
|
.iter()
|
|
.cloned()
|
|
.map(String::from)
|
|
.collect();
|
|
|
|
let loader = Loader::new(Configuration {
|
|
language: vec![],
|
|
language_server: HashMap::new(),
|
|
})
|
|
.unwrap();
|
|
|
|
let language = get_language("rust").unwrap();
|
|
let config = HighlightConfiguration::new(
|
|
language,
|
|
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm")
|
|
.unwrap(),
|
|
&std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm")
|
|
.unwrap(),
|
|
"", // locals.scm
|
|
)
|
|
.unwrap();
|
|
config.configure(&highlight_names);
|
|
|
|
let source = Rope::from_str(
|
|
"
|
|
struct Stuff {}
|
|
fn main() {}
|
|
",
|
|
);
|
|
let syntax = Syntax::new(
|
|
source.slice(..),
|
|
Arc::new(config),
|
|
Arc::new(ArcSwap::from_pointee(loader)),
|
|
)
|
|
.unwrap();
|
|
let tree = syntax.tree();
|
|
let root = tree.root_node();
|
|
assert_eq!(root.kind(), "source_file");
|
|
|
|
assert_eq!(
|
|
root.to_sexp(),
|
|
concat!(
|
|
"(source_file ",
|
|
"(struct_item name: (type_identifier) body: (field_declaration_list)) ",
|
|
"(function_item name: (identifier) parameters: (parameters) body: (block)))"
|
|
)
|
|
);
|
|
|
|
let struct_node = root.child(0).unwrap();
|
|
assert_eq!(struct_node.kind(), "struct_item");
|
|
}
|
|
|
|
#[test]
|
|
fn test_input_edits() {
|
|
use tree_sitter::InputEdit;
|
|
|
|
let doc = Rope::from("hello world!\ntest 123");
|
|
let transaction = Transaction::change(
|
|
&doc,
|
|
vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(),
|
|
);
|
|
let edits = generate_edits(doc.slice(..), transaction.changes());
|
|
// transaction.apply(&mut state);
|
|
|
|
assert_eq!(
|
|
edits,
|
|
&[
|
|
InputEdit {
|
|
start_byte: 6,
|
|
old_end_byte: 11,
|
|
new_end_byte: 10,
|
|
start_position: Point { row: 0, column: 6 },
|
|
old_end_position: Point { row: 0, column: 11 },
|
|
new_end_position: Point { row: 0, column: 10 }
|
|
},
|
|
InputEdit {
|
|
start_byte: 12,
|
|
old_end_byte: 17,
|
|
new_end_byte: 12,
|
|
start_position: Point { row: 0, column: 12 },
|
|
old_end_position: Point { row: 1, column: 4 },
|
|
new_end_position: Point { row: 0, column: 12 }
|
|
}
|
|
]
|
|
);
|
|
|
|
// Testing with the official example from tree-sitter
|
|
let mut doc = Rope::from("fn test() {}");
|
|
let transaction =
|
|
Transaction::change(&doc, vec![(8, 8, Some("a: u32".into()))].into_iter());
|
|
let edits = generate_edits(doc.slice(..), transaction.changes());
|
|
transaction.apply(&mut doc);
|
|
|
|
assert_eq!(doc, "fn test(a: u32) {}");
|
|
assert_eq!(
|
|
edits,
|
|
&[InputEdit {
|
|
start_byte: 8,
|
|
old_end_byte: 8,
|
|
new_end_byte: 14,
|
|
start_position: Point { row: 0, column: 8 },
|
|
old_end_position: Point { row: 0, column: 8 },
|
|
new_end_position: Point { row: 0, column: 14 }
|
|
}]
|
|
);
|
|
}
|
|
|
|
#[track_caller]
|
|
fn assert_pretty_print(
|
|
language_name: &str,
|
|
source: &str,
|
|
expected: &str,
|
|
start: usize,
|
|
end: usize,
|
|
) {
|
|
let source = Rope::from_str(source);
|
|
|
|
let loader = Loader::new(Configuration {
|
|
language: vec![],
|
|
language_server: HashMap::new(),
|
|
})
|
|
.unwrap();
|
|
let language = get_language(language_name).unwrap();
|
|
|
|
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
|
|
let syntax = Syntax::new(
|
|
source.slice(..),
|
|
Arc::new(config),
|
|
Arc::new(ArcSwap::from_pointee(loader)),
|
|
)
|
|
.unwrap();
|
|
|
|
let root = syntax
|
|
.tree()
|
|
.root_node()
|
|
.descendant_for_byte_range(start, end)
|
|
.unwrap();
|
|
|
|
let mut output = String::new();
|
|
pretty_print_tree(&mut output, root).unwrap();
|
|
|
|
assert_eq!(expected, output);
|
|
}
|
|
|
|
#[test]
|
|
fn test_pretty_print() {
|
|
let source = r#"// Hello"#;
|
|
assert_pretty_print("rust", source, "(line_comment)", 0, source.len());
|
|
|
|
// A large tree should be indented with fields:
|
|
let source = r#"fn main() {
|
|
println!("Hello, World!");
|
|
}"#;
|
|
assert_pretty_print(
|
|
"rust",
|
|
source,
|
|
concat!(
|
|
"(function_item\n",
|
|
" name: (identifier)\n",
|
|
" parameters: (parameters)\n",
|
|
" body: (block\n",
|
|
" (expression_statement\n",
|
|
" (macro_invocation\n",
|
|
" macro: (identifier)\n",
|
|
" (token_tree\n",
|
|
" (string_literal\n",
|
|
" (string_content)))))))",
|
|
),
|
|
0,
|
|
source.len(),
|
|
);
|
|
|
|
// Selecting a token should print just that token:
|
|
let source = r#"fn main() {}"#;
|
|
assert_pretty_print("rust", source, r#""fn""#, 0, 1);
|
|
|
|
// Error nodes are printed as errors:
|
|
let source = r#"}{"#;
|
|
assert_pretty_print("rust", source, "(ERROR)", 0, source.len());
|
|
|
|
// Fields broken under unnamed nodes are determined correctly.
|
|
// In the following source, `object` belongs to the `singleton_method`
|
|
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
|
|
// This can cause a bug with a pretty-printing implementation that
|
|
// uses `Node::field_name_for_child` to determine field names but is
|
|
// fixed when using `tree_sitter::TreeCursor::field_name`.
|
|
let source = "def self.method_name
|
|
true
|
|
end";
|
|
assert_pretty_print(
|
|
"ruby",
|
|
source,
|
|
concat!(
|
|
"(singleton_method\n",
|
|
" object: (self)\n",
|
|
" name: (identifier)\n",
|
|
" body: (body_statement\n",
|
|
" (true)))"
|
|
),
|
|
0,
|
|
source.len(),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_load_runtime_file() {
|
|
// Test to make sure we can load some data from the runtime directory.
|
|
let contents = load_runtime_file("rust", "indents.scm").unwrap();
|
|
assert!(!contents.is_empty());
|
|
|
|
let results = load_runtime_file("rust", "does-not-exist");
|
|
assert!(results.is_err());
|
|
}
|
|
}
|