From e83d1fc9fcfc6c80fb14b55e38fe7c1a7a4b7235 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Thu, 24 Feb 2022 16:33:31 +0100 Subject: [PATCH] Start on a regex implementation of `SearchQuery` --- crates/project/src/project.rs | 15 +++-- crates/project/src/search.rs | 108 ++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 8 deletions(-) create mode 100644 crates/project/src/search.rs diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 74890a2f51..aa9e47fcb7 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -2108,10 +2108,7 @@ impl Project { let matches = if let Some(file) = fs.open_sync(&path).await.log_err() { - query - .search(file) - .next() - .map_or(false, |range| range.is_ok()) + query.is_contained_in_stream(file).unwrap_or(false) } else { false }; @@ -2176,10 +2173,12 @@ impl Project { let mut buffers_rx = buffers_rx.clone(); scope.spawn(async move { while let Some((buffer, snapshot)) = buffers_rx.next().await { - for range in query.search( - snapshot.as_rope().bytes_in_range(0..snapshot.len()), - ) { - let range = range.unwrap(); + for range in query + .search( + snapshot.as_rope().bytes_in_range(0..snapshot.len()), + ) + .unwrap() + { let range = snapshot.anchor_before(range.start) ..snapshot.anchor_after(range.end); worker_matched_buffers diff --git a/crates/project/src/search.rs b/crates/project/src/search.rs new file mode 100644 index 0000000000..69be605c93 --- /dev/null +++ b/crates/project/src/search.rs @@ -0,0 +1,108 @@ +use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; +use anyhow::Result; +use regex::{Regex, RegexBuilder}; +use std::{ + borrow::Cow, + io::{BufRead, BufReader, Read}, + ops::Range, + sync::Arc, +}; + +#[derive(Clone)] +pub enum SearchQuery { + Text { search: Arc> }, + Regex { multiline: bool, regex: Regex }, +} + +impl SearchQuery { + pub fn text(query: &str) -> Self { + let search = AhoCorasickBuilder::new() + .auto_configure(&[query]) + .build(&[query]); + Self::Text { + search: Arc::new(search), + } + } + + pub fn regex(query: &str, whole_word: bool, case_sensitive: bool) -> Result { + let mut query = Cow::Borrowed(query); + if whole_word { + let mut word_query = String::new(); + word_query.push_str("\\b"); + word_query.push_str(&query); + word_query.push_str("\\b"); + query = Cow::Owned(word_query); + } + + let multiline = query.contains("\n") || query.contains("\\n"); + let regex = RegexBuilder::new(&query) + .case_insensitive(!case_sensitive) + .multi_line(multiline) + .build()?; + Ok(Self::Regex { multiline, regex }) + } + + pub fn is_contained_in_stream(&self, stream: T) -> Result { + match self { + SearchQuery::Text { search } => { + let mat = search.stream_find_iter(stream).next(); + match mat { + Some(Ok(_)) => Ok(true), + Some(Err(err)) => Err(err.into()), + None => Ok(false), + } + } + SearchQuery::Regex { multiline, regex } => { + let mut reader = BufReader::new(stream); + if *multiline { + let mut text = String::new(); + if let Err(err) = reader.read_to_string(&mut text) { + Err(err.into()) + } else { + Ok(regex.find(&text).is_some()) + } + } else { + for line in reader.lines() { + let line = line?; + if regex.find(&line).is_some() { + return Ok(true); + } + } + Ok(false) + } + } + } + } + + pub fn search<'a, T: 'a + Read>(&'a self, stream: T) -> Result>> { + let mut matches = Vec::new(); + match self { + SearchQuery::Text { search } => { + for mat in search.stream_find_iter(stream) { + let mat = mat?; + matches.push(mat.start()..mat.end()) + } + } + SearchQuery::Regex { multiline, regex } => { + let mut reader = BufReader::new(stream); + if *multiline { + let mut text = String::new(); + reader.read_to_string(&mut text)?; + matches.extend(regex.find_iter(&text).map(|mat| mat.start()..mat.end())); + } else { + let mut line_ix = 0; + for line in reader.lines() { + let line = line?; + matches.extend( + regex + .find_iter(&line) + .map(|mat| (line_ix + mat.start())..(line_ix + mat.end())), + ); + line_ix += line.len(); + } + } + } + } + Ok(matches) + } +}