mirror of
https://github.com/zed-industries/zed.git
synced 2025-01-24 19:10:24 +00:00
Z 2819 (#2872)
This PR adds new config option to language config called `word_boundaries` that controls which characters should be recognised as word boundary for a given language. This will improve our UX for languages such as PHP and Tailwind. Release Notes: - Improved completions for PHP [#1820](https://github.com/zed-industries/community/issues/1820) --------- Co-authored-by: Julia Risley <julia@zed.dev>
This commit is contained in:
parent
c842e87079
commit
007d1b09ac
12 changed files with 120 additions and 59 deletions
|
@ -2654,7 +2654,6 @@ impl Editor {
|
|||
false
|
||||
});
|
||||
}
|
||||
|
||||
fn completion_query(buffer: &MultiBufferSnapshot, position: impl ToOffset) -> Option<String> {
|
||||
let offset = position.to_offset(buffer);
|
||||
let (word_range, kind) = buffer.surrounding_word(offset);
|
||||
|
|
|
@ -1028,7 +1028,7 @@ impl SearchableItem for Editor {
|
|||
if let Some((_, _, excerpt_buffer)) = buffer.as_singleton() {
|
||||
ranges.extend(
|
||||
query
|
||||
.search(excerpt_buffer.as_rope())
|
||||
.search(excerpt_buffer, None)
|
||||
.await
|
||||
.into_iter()
|
||||
.map(|range| {
|
||||
|
@ -1038,17 +1038,22 @@ impl SearchableItem for Editor {
|
|||
} else {
|
||||
for excerpt in buffer.excerpt_boundaries_in_range(0..buffer.len()) {
|
||||
let excerpt_range = excerpt.range.context.to_offset(&excerpt.buffer);
|
||||
let rope = excerpt.buffer.as_rope().slice(excerpt_range.clone());
|
||||
ranges.extend(query.search(&rope).await.into_iter().map(|range| {
|
||||
let start = excerpt
|
||||
.buffer
|
||||
.anchor_after(excerpt_range.start + range.start);
|
||||
let end = excerpt
|
||||
.buffer
|
||||
.anchor_before(excerpt_range.start + range.end);
|
||||
buffer.anchor_in_excerpt(excerpt.id.clone(), start)
|
||||
..buffer.anchor_in_excerpt(excerpt.id.clone(), end)
|
||||
}));
|
||||
ranges.extend(
|
||||
query
|
||||
.search(&excerpt.buffer, Some(excerpt_range.clone()))
|
||||
.await
|
||||
.into_iter()
|
||||
.map(|range| {
|
||||
let start = excerpt
|
||||
.buffer
|
||||
.anchor_after(excerpt_range.start + range.start);
|
||||
let end = excerpt
|
||||
.buffer
|
||||
.anchor_before(excerpt_range.start + range.end);
|
||||
buffer.anchor_in_excerpt(excerpt.id.clone(), start)
|
||||
..buffer.anchor_in_excerpt(excerpt.id.clone(), end)
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
ranges
|
||||
|
|
|
@ -176,14 +176,21 @@ pub fn line_end(
|
|||
}
|
||||
|
||||
pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
|
||||
let raw_point = point.to_point(map);
|
||||
let language = map.buffer_snapshot.language_at(raw_point);
|
||||
|
||||
find_preceding_boundary(map, point, |left, right| {
|
||||
(char_kind(left) != char_kind(right) && !right.is_whitespace()) || left == '\n'
|
||||
(char_kind(language, left) != char_kind(language, right) && !right.is_whitespace())
|
||||
|| left == '\n'
|
||||
})
|
||||
}
|
||||
|
||||
pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
|
||||
let raw_point = point.to_point(map);
|
||||
let language = map.buffer_snapshot.language_at(raw_point);
|
||||
find_preceding_boundary(map, point, |left, right| {
|
||||
let is_word_start = char_kind(left) != char_kind(right) && !right.is_whitespace();
|
||||
let is_word_start =
|
||||
char_kind(language, left) != char_kind(language, right) && !right.is_whitespace();
|
||||
let is_subword_start =
|
||||
left == '_' && right != '_' || left.is_lowercase() && right.is_uppercase();
|
||||
is_word_start || is_subword_start || left == '\n'
|
||||
|
@ -191,14 +198,20 @@ pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> Dis
|
|||
}
|
||||
|
||||
pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
|
||||
let raw_point = point.to_point(map);
|
||||
let language = map.buffer_snapshot.language_at(raw_point);
|
||||
find_boundary(map, point, |left, right| {
|
||||
(char_kind(left) != char_kind(right) && !left.is_whitespace()) || right == '\n'
|
||||
(char_kind(language, left) != char_kind(language, right) && !left.is_whitespace())
|
||||
|| right == '\n'
|
||||
})
|
||||
}
|
||||
|
||||
pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
|
||||
let raw_point = point.to_point(map);
|
||||
let language = map.buffer_snapshot.language_at(raw_point);
|
||||
find_boundary(map, point, |left, right| {
|
||||
let is_word_end = (char_kind(left) != char_kind(right)) && !left.is_whitespace();
|
||||
let is_word_end =
|
||||
(char_kind(language, left) != char_kind(language, right)) && !left.is_whitespace();
|
||||
let is_subword_end =
|
||||
left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase();
|
||||
is_word_end || is_subword_end || right == '\n'
|
||||
|
@ -385,10 +398,15 @@ pub fn find_boundary_in_line(
|
|||
}
|
||||
|
||||
pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool {
|
||||
let raw_point = point.to_point(map);
|
||||
let language = map.buffer_snapshot.language_at(raw_point);
|
||||
let ix = map.clip_point(point, Bias::Left).to_offset(map, Bias::Left);
|
||||
let text = &map.buffer_snapshot;
|
||||
let next_char_kind = text.chars_at(ix).next().map(char_kind);
|
||||
let prev_char_kind = text.reversed_chars_at(ix).next().map(char_kind);
|
||||
let next_char_kind = text.chars_at(ix).next().map(|c| char_kind(language, c));
|
||||
let prev_char_kind = text
|
||||
.reversed_chars_at(ix)
|
||||
.next()
|
||||
.map(|c| char_kind(language, c));
|
||||
prev_char_kind.zip(next_char_kind) == Some((CharKind::Word, CharKind::Word))
|
||||
}
|
||||
|
||||
|
|
|
@ -1865,13 +1865,16 @@ impl MultiBufferSnapshot {
|
|||
let mut end = start;
|
||||
let mut next_chars = self.chars_at(start).peekable();
|
||||
let mut prev_chars = self.reversed_chars_at(start).peekable();
|
||||
|
||||
let language = self.language_at(start);
|
||||
let kind = |c| char_kind(language, c);
|
||||
let word_kind = cmp::max(
|
||||
prev_chars.peek().copied().map(char_kind),
|
||||
next_chars.peek().copied().map(char_kind),
|
||||
prev_chars.peek().copied().map(kind),
|
||||
next_chars.peek().copied().map(kind),
|
||||
);
|
||||
|
||||
for ch in prev_chars {
|
||||
if Some(char_kind(ch)) == word_kind && ch != '\n' {
|
||||
if Some(kind(ch)) == word_kind && ch != '\n' {
|
||||
start -= ch.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
|
@ -1879,7 +1882,7 @@ impl MultiBufferSnapshot {
|
|||
}
|
||||
|
||||
for ch in next_chars {
|
||||
if Some(char_kind(ch)) == word_kind && ch != '\n' {
|
||||
if Some(kind(ch)) == word_kind && ch != '\n' {
|
||||
end += ch.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
|
|
|
@ -2174,13 +2174,16 @@ impl BufferSnapshot {
|
|||
let mut end = start;
|
||||
let mut next_chars = self.chars_at(start).peekable();
|
||||
let mut prev_chars = self.reversed_chars_at(start).peekable();
|
||||
|
||||
let language = self.language_at(start);
|
||||
let kind = |c| char_kind(language, c);
|
||||
let word_kind = cmp::max(
|
||||
prev_chars.peek().copied().map(char_kind),
|
||||
next_chars.peek().copied().map(char_kind),
|
||||
prev_chars.peek().copied().map(kind),
|
||||
next_chars.peek().copied().map(kind),
|
||||
);
|
||||
|
||||
for ch in prev_chars {
|
||||
if Some(char_kind(ch)) == word_kind && ch != '\n' {
|
||||
if Some(kind(ch)) == word_kind && ch != '\n' {
|
||||
start -= ch.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
|
@ -2188,7 +2191,7 @@ impl BufferSnapshot {
|
|||
}
|
||||
|
||||
for ch in next_chars {
|
||||
if Some(char_kind(ch)) == word_kind && ch != '\n' {
|
||||
if Some(kind(ch)) == word_kind && ch != '\n' {
|
||||
end += ch.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
|
@ -2985,14 +2988,18 @@ pub fn contiguous_ranges(
|
|||
})
|
||||
}
|
||||
|
||||
pub fn char_kind(c: char) -> CharKind {
|
||||
pub fn char_kind(language: Option<&Arc<Language>>, c: char) -> CharKind {
|
||||
if c.is_whitespace() {
|
||||
CharKind::Whitespace
|
||||
return CharKind::Whitespace;
|
||||
} else if c.is_alphanumeric() || c == '_' {
|
||||
CharKind::Word
|
||||
} else {
|
||||
CharKind::Punctuation
|
||||
return CharKind::Word;
|
||||
}
|
||||
if let Some(language) = language {
|
||||
if language.config.word_characters.contains(&c) {
|
||||
return CharKind::Word;
|
||||
}
|
||||
}
|
||||
CharKind::Punctuation
|
||||
}
|
||||
|
||||
/// Find all of the ranges of whitespace that occur at the ends of lines
|
||||
|
|
|
@ -11,7 +11,7 @@ mod buffer_tests;
|
|||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use collections::HashMap;
|
||||
use collections::{HashMap, HashSet};
|
||||
use futures::{
|
||||
channel::oneshot,
|
||||
future::{BoxFuture, Shared},
|
||||
|
@ -344,6 +344,8 @@ pub struct LanguageConfig {
|
|||
pub block_comment: Option<(Arc<str>, Arc<str>)>,
|
||||
#[serde(default)]
|
||||
pub overrides: HashMap<String, LanguageConfigOverride>,
|
||||
#[serde(default)]
|
||||
pub word_characters: HashSet<char>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
|
@ -411,6 +413,7 @@ impl Default for LanguageConfig {
|
|||
block_comment: Default::default(),
|
||||
overrides: Default::default(),
|
||||
collapsed_placeholder: Default::default(),
|
||||
word_characters: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5193,7 +5193,7 @@ impl Project {
|
|||
snapshot.file().map(|file| file.path().as_ref()),
|
||||
) {
|
||||
query
|
||||
.search(snapshot.as_rope())
|
||||
.search(&snapshot, None)
|
||||
.await
|
||||
.iter()
|
||||
.map(|range| {
|
||||
|
|
|
@ -3,7 +3,7 @@ use anyhow::{Context, Result};
|
|||
use client::proto;
|
||||
use globset::{Glob, GlobMatcher};
|
||||
use itertools::Itertools;
|
||||
use language::{char_kind, Rope};
|
||||
use language::{char_kind, BufferSnapshot};
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use smol::future::yield_now;
|
||||
use std::{
|
||||
|
@ -23,6 +23,7 @@ pub enum SearchQuery {
|
|||
files_to_include: Vec<PathMatcher>,
|
||||
files_to_exclude: Vec<PathMatcher>,
|
||||
},
|
||||
|
||||
Regex {
|
||||
regex: Regex,
|
||||
query: Arc<str>,
|
||||
|
@ -193,12 +194,24 @@ impl SearchQuery {
|
|||
}
|
||||
}
|
||||
|
||||
pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
|
||||
pub async fn search(
|
||||
&self,
|
||||
buffer: &BufferSnapshot,
|
||||
subrange: Option<Range<usize>>,
|
||||
) -> Vec<Range<usize>> {
|
||||
const YIELD_INTERVAL: usize = 20000;
|
||||
|
||||
if self.as_str().is_empty() {
|
||||
return Default::default();
|
||||
}
|
||||
let language = buffer.language_at(0);
|
||||
let rope = if let Some(range) = subrange {
|
||||
buffer.as_rope().slice(range)
|
||||
} else {
|
||||
buffer.as_rope().clone()
|
||||
};
|
||||
|
||||
let kind = |c| char_kind(language, c);
|
||||
|
||||
let mut matches = Vec::new();
|
||||
match self {
|
||||
|
@ -215,10 +228,10 @@ impl SearchQuery {
|
|||
|
||||
let mat = mat.unwrap();
|
||||
if *whole_word {
|
||||
let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
|
||||
let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
|
||||
let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
|
||||
let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
|
||||
let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
|
||||
let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
|
||||
let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
|
||||
let next_kind = rope.chars_at(mat.end()).next().map(kind);
|
||||
if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
|
||||
continue;
|
||||
}
|
||||
|
@ -226,6 +239,7 @@ impl SearchQuery {
|
|||
matches.push(mat.start()..mat.end())
|
||||
}
|
||||
}
|
||||
|
||||
Self::Regex {
|
||||
regex, multiline, ..
|
||||
} => {
|
||||
|
@ -263,6 +277,7 @@ impl SearchQuery {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
matches
|
||||
}
|
||||
|
||||
|
|
|
@ -439,11 +439,12 @@ pub(crate) fn next_word_start(
|
|||
ignore_punctuation: bool,
|
||||
times: usize,
|
||||
) -> DisplayPoint {
|
||||
let language = map.buffer_snapshot.language_at(point.to_point(map));
|
||||
for _ in 0..times {
|
||||
let mut crossed_newline = false;
|
||||
point = movement::find_boundary(map, point, |left, right| {
|
||||
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
|
||||
let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
|
||||
let at_newline = right == '\n';
|
||||
|
||||
let found = (left_kind != right_kind && right_kind != CharKind::Whitespace)
|
||||
|
@ -463,11 +464,12 @@ fn next_word_end(
|
|||
ignore_punctuation: bool,
|
||||
times: usize,
|
||||
) -> DisplayPoint {
|
||||
let language = map.buffer_snapshot.language_at(point.to_point(map));
|
||||
for _ in 0..times {
|
||||
*point.column_mut() += 1;
|
||||
point = movement::find_boundary(map, point, |left, right| {
|
||||
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
|
||||
let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
|
||||
|
||||
left_kind != right_kind && left_kind != CharKind::Whitespace
|
||||
});
|
||||
|
@ -493,12 +495,13 @@ fn previous_word_start(
|
|||
ignore_punctuation: bool,
|
||||
times: usize,
|
||||
) -> DisplayPoint {
|
||||
let language = map.buffer_snapshot.language_at(point.to_point(map));
|
||||
for _ in 0..times {
|
||||
// This works even though find_preceding_boundary is called for every character in the line containing
|
||||
// cursor because the newline is checked only once.
|
||||
point = movement::find_preceding_boundary(map, point, |left, right| {
|
||||
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
|
||||
let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
|
||||
|
||||
(left_kind != right_kind && !right.is_whitespace()) || left == '\n'
|
||||
});
|
||||
|
@ -508,6 +511,7 @@ fn previous_word_start(
|
|||
|
||||
fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoint {
|
||||
let mut last_point = DisplayPoint::new(from.row(), 0);
|
||||
let language = map.buffer_snapshot.language_at(from.to_point(map));
|
||||
for (ch, point) in map.chars_at(last_point) {
|
||||
if ch == '\n' {
|
||||
return from;
|
||||
|
@ -515,7 +519,7 @@ fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoi
|
|||
|
||||
last_point = point;
|
||||
|
||||
if char_kind(ch) != CharKind::Whitespace {
|
||||
if char_kind(language, ch) != CharKind::Whitespace {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -82,16 +82,19 @@ fn expand_changed_word_selection(
|
|||
ignore_punctuation: bool,
|
||||
) -> bool {
|
||||
if times.is_none() || times.unwrap() == 1 {
|
||||
let language = map
|
||||
.buffer_snapshot
|
||||
.language_at(selection.start.to_point(map));
|
||||
let in_word = map
|
||||
.chars_at(selection.head())
|
||||
.next()
|
||||
.map(|(c, _)| char_kind(c) != CharKind::Whitespace)
|
||||
.map(|(c, _)| char_kind(language, c) != CharKind::Whitespace)
|
||||
.unwrap_or_default();
|
||||
|
||||
if in_word {
|
||||
selection.end = movement::find_boundary(map, selection.end, |left, right| {
|
||||
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
|
||||
let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
|
||||
|
||||
left_kind != right_kind && left_kind != CharKind::Whitespace
|
||||
});
|
||||
|
|
|
@ -122,17 +122,18 @@ fn in_word(
|
|||
ignore_punctuation: bool,
|
||||
) -> Option<Range<DisplayPoint>> {
|
||||
// Use motion::right so that we consider the character under the cursor when looking for the start
|
||||
let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
|
||||
let start = movement::find_preceding_boundary_in_line(
|
||||
map,
|
||||
right(map, relative_to, 1),
|
||||
|left, right| {
|
||||
char_kind(left).coerce_punctuation(ignore_punctuation)
|
||||
!= char_kind(right).coerce_punctuation(ignore_punctuation)
|
||||
char_kind(language, left).coerce_punctuation(ignore_punctuation)
|
||||
!= char_kind(language, right).coerce_punctuation(ignore_punctuation)
|
||||
},
|
||||
);
|
||||
let end = movement::find_boundary_in_line(map, relative_to, |left, right| {
|
||||
char_kind(left).coerce_punctuation(ignore_punctuation)
|
||||
!= char_kind(right).coerce_punctuation(ignore_punctuation)
|
||||
char_kind(language, left).coerce_punctuation(ignore_punctuation)
|
||||
!= char_kind(language, right).coerce_punctuation(ignore_punctuation)
|
||||
});
|
||||
|
||||
Some(start..end)
|
||||
|
@ -155,10 +156,11 @@ fn around_word(
|
|||
relative_to: DisplayPoint,
|
||||
ignore_punctuation: bool,
|
||||
) -> Option<Range<DisplayPoint>> {
|
||||
let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
|
||||
let in_word = map
|
||||
.chars_at(relative_to)
|
||||
.next()
|
||||
.map(|(c, _)| char_kind(c) != CharKind::Whitespace)
|
||||
.map(|(c, _)| char_kind(language, c) != CharKind::Whitespace)
|
||||
.unwrap_or(false);
|
||||
|
||||
if in_word {
|
||||
|
@ -182,20 +184,21 @@ fn around_next_word(
|
|||
relative_to: DisplayPoint,
|
||||
ignore_punctuation: bool,
|
||||
) -> Option<Range<DisplayPoint>> {
|
||||
let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
|
||||
// Get the start of the word
|
||||
let start = movement::find_preceding_boundary_in_line(
|
||||
map,
|
||||
right(map, relative_to, 1),
|
||||
|left, right| {
|
||||
char_kind(left).coerce_punctuation(ignore_punctuation)
|
||||
!= char_kind(right).coerce_punctuation(ignore_punctuation)
|
||||
char_kind(language, left).coerce_punctuation(ignore_punctuation)
|
||||
!= char_kind(language, right).coerce_punctuation(ignore_punctuation)
|
||||
},
|
||||
);
|
||||
|
||||
let mut word_found = false;
|
||||
let end = movement::find_boundary(map, relative_to, |left, right| {
|
||||
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
|
||||
let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
|
||||
let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
|
||||
|
||||
let found = (word_found && left_kind != right_kind) || right == '\n' && left == '\n';
|
||||
|
||||
|
|
|
@ -10,3 +10,4 @@ brackets = [
|
|||
{ start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] },
|
||||
]
|
||||
collapsed_placeholder = "/* ... */"
|
||||
word_characters = ["$"]
|
||||
|
|
Loading…
Reference in a new issue