2022-02-27 14:47:46 +00:00
use aho_corasick ::{ AhoCorasick , AhoCorasickBuilder } ;
2023-07-28 09:56:44 +00:00
use anyhow ::{ Context , Result } ;
2022-02-27 14:47:46 +00:00
use client ::proto ;
2023-05-07 19:17:26 +00:00
use itertools ::Itertools ;
2023-08-22 08:35:20 +00:00
use language ::{ char_kind , BufferSnapshot } ;
2024-03-25 11:21:04 +00:00
use regex ::{ Captures , Regex , RegexBuilder } ;
2022-02-27 14:47:46 +00:00
use smol ::future ::yield_now ;
use std ::{
2023-09-12 16:46:54 +00:00
borrow ::Cow ,
2022-02-27 14:47:46 +00:00
io ::{ BufRead , BufReader , Read } ,
ops ::Range ,
2023-11-02 19:21:41 +00:00
path ::Path ,
2024-03-25 11:21:04 +00:00
sync ::{ Arc , OnceLock } ,
2022-02-27 14:47:46 +00:00
} ;
2023-11-01 09:53:00 +00:00
use util ::paths ::PathMatcher ;
2022-02-27 14:47:46 +00:00
2024-03-25 11:21:04 +00:00
static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX : OnceLock < Regex > = OnceLock ::new ( ) ;
2023-08-16 14:50:54 +00:00
#[ derive(Clone, Debug) ]
pub struct SearchInputs {
query : Arc < str > ,
files_to_include : Vec < PathMatcher > ,
files_to_exclude : Vec < PathMatcher > ,
}
impl SearchInputs {
pub fn as_str ( & self ) -> & str {
self . query . as_ref ( )
}
pub fn files_to_include ( & self ) -> & [ PathMatcher ] {
& self . files_to_include
}
pub fn files_to_exclude ( & self ) -> & [ PathMatcher ] {
& self . files_to_exclude
}
}
2023-05-07 19:50:54 +00:00
#[ derive(Clone, Debug) ]
2022-02-27 14:47:46 +00:00
pub enum SearchQuery {
Text {
2023-09-18 15:01:08 +00:00
search : Arc < AhoCorasick > ,
2023-09-12 16:46:54 +00:00
replacement : Option < String > ,
2022-02-27 14:47:46 +00:00
whole_word : bool ,
case_sensitive : bool ,
2023-11-10 08:56:28 +00:00
include_ignored : bool ,
2023-08-16 14:50:54 +00:00
inner : SearchInputs ,
2022-02-27 14:47:46 +00:00
} ,
2023-08-22 08:35:20 +00:00
2022-02-27 14:47:46 +00:00
Regex {
regex : Regex ,
2023-09-12 16:46:54 +00:00
replacement : Option < String > ,
2022-02-27 14:47:46 +00:00
multiline : bool ,
whole_word : bool ,
case_sensitive : bool ,
2023-11-10 08:56:28 +00:00
include_ignored : bool ,
2023-08-16 14:50:54 +00:00
inner : SearchInputs ,
2022-02-27 14:47:46 +00:00
} ,
}
impl SearchQuery {
2023-05-07 19:17:26 +00:00
pub fn text (
query : impl ToString ,
whole_word : bool ,
case_sensitive : bool ,
2023-11-10 08:56:28 +00:00
include_ignored : bool ,
2023-07-28 09:56:44 +00:00
files_to_include : Vec < PathMatcher > ,
files_to_exclude : Vec < PathMatcher > ,
2023-09-18 15:01:08 +00:00
) -> Result < Self > {
2022-02-27 14:47:46 +00:00
let query = query . to_string ( ) ;
let search = AhoCorasickBuilder ::new ( )
. ascii_case_insensitive ( ! case_sensitive )
2023-09-18 15:01:08 +00:00
. build ( & [ & query ] ) ? ;
2023-08-16 14:50:54 +00:00
let inner = SearchInputs {
query : query . into ( ) ,
files_to_exclude ,
files_to_include ,
} ;
2023-09-18 15:01:08 +00:00
Ok ( Self ::Text {
2022-02-27 14:47:46 +00:00
search : Arc ::new ( search ) ,
2023-09-12 16:46:54 +00:00
replacement : None ,
2022-02-27 14:47:46 +00:00
whole_word ,
case_sensitive ,
2023-11-10 08:56:28 +00:00
include_ignored ,
2023-08-16 14:50:54 +00:00
inner ,
2023-09-18 15:01:08 +00:00
} )
2022-02-27 14:47:46 +00:00
}
2023-05-07 19:17:26 +00:00
pub fn regex (
query : impl ToString ,
whole_word : bool ,
case_sensitive : bool ,
2023-11-10 08:56:28 +00:00
include_ignored : bool ,
2023-07-28 09:56:44 +00:00
files_to_include : Vec < PathMatcher > ,
files_to_exclude : Vec < PathMatcher > ,
2023-05-07 19:17:26 +00:00
) -> Result < Self > {
2022-02-27 14:47:46 +00:00
let mut query = query . to_string ( ) ;
let initial_query = Arc ::from ( query . as_str ( ) ) ;
if whole_word {
let mut word_query = String ::new ( ) ;
word_query . push_str ( " \\ b " ) ;
word_query . push_str ( & query ) ;
word_query . push_str ( " \\ b " ) ;
query = word_query
}
2022-08-10 21:39:24 +00:00
let multiline = query . contains ( '\n' ) | | query . contains ( " \\ n " ) ;
2022-02-27 14:47:46 +00:00
let regex = RegexBuilder ::new ( & query )
. case_insensitive ( ! case_sensitive )
. multi_line ( multiline )
. build ( ) ? ;
2023-08-16 14:50:54 +00:00
let inner = SearchInputs {
query : initial_query ,
files_to_exclude ,
files_to_include ,
} ;
2022-02-27 14:47:46 +00:00
Ok ( Self ::Regex {
regex ,
2023-09-12 16:46:54 +00:00
replacement : None ,
2022-02-27 14:47:46 +00:00
multiline ,
whole_word ,
case_sensitive ,
2023-11-10 08:56:28 +00:00
include_ignored ,
2023-08-16 14:50:54 +00:00
inner ,
2022-02-27 14:47:46 +00:00
} )
}
pub fn from_proto ( message : proto ::SearchProject ) -> Result < Self > {
if message . regex {
2023-05-07 19:17:26 +00:00
Self ::regex (
message . query ,
message . whole_word ,
message . case_sensitive ,
2023-11-10 08:56:28 +00:00
message . include_ignored ,
2023-07-28 09:56:44 +00:00
deserialize_path_matches ( & message . files_to_include ) ? ,
deserialize_path_matches ( & message . files_to_exclude ) ? ,
2023-05-07 19:17:26 +00:00
)
2022-02-27 14:47:46 +00:00
} else {
2023-09-18 15:01:08 +00:00
Self ::text (
2022-02-27 14:47:46 +00:00
message . query ,
message . whole_word ,
message . case_sensitive ,
2023-11-10 08:56:28 +00:00
message . include_ignored ,
2023-07-28 09:56:44 +00:00
deserialize_path_matches ( & message . files_to_include ) ? ,
deserialize_path_matches ( & message . files_to_exclude ) ? ,
2023-09-18 15:01:08 +00:00
)
2022-02-27 14:47:46 +00:00
}
}
2023-09-20 02:47:11 +00:00
pub fn with_replacement ( mut self , new_replacement : String ) -> Self {
2023-09-12 16:46:54 +00:00
match self {
Self ::Text {
ref mut replacement ,
..
}
| Self ::Regex {
ref mut replacement ,
..
} = > {
2023-09-20 02:47:11 +00:00
* replacement = Some ( new_replacement ) ;
2023-09-12 16:46:54 +00:00
self
}
}
}
2022-02-27 14:47:46 +00:00
pub fn to_proto ( & self , project_id : u64 ) -> proto ::SearchProject {
proto ::SearchProject {
project_id ,
query : self . as_str ( ) . to_string ( ) ,
regex : self . is_regex ( ) ,
whole_word : self . whole_word ( ) ,
case_sensitive : self . case_sensitive ( ) ,
2023-11-10 08:56:28 +00:00
include_ignored : self . include_ignored ( ) ,
2023-05-07 19:17:26 +00:00
files_to_include : self
. files_to_include ( )
. iter ( )
2023-07-28 09:56:44 +00:00
. map ( | matcher | matcher . to_string ( ) )
2023-05-07 19:17:26 +00:00
. join ( " , " ) ,
files_to_exclude : self
. files_to_exclude ( )
. iter ( )
2023-07-28 09:56:44 +00:00
. map ( | matcher | matcher . to_string ( ) )
2023-05-07 19:17:26 +00:00
. join ( " , " ) ,
2022-02-27 14:47:46 +00:00
}
}
pub fn detect < T : Read > ( & self , stream : T ) -> Result < bool > {
if self . as_str ( ) . is_empty ( ) {
return Ok ( false ) ;
}
match self {
Self ::Text { search , .. } = > {
let mat = search . stream_find_iter ( stream ) . next ( ) ;
match mat {
Some ( Ok ( _ ) ) = > Ok ( true ) ,
Some ( Err ( err ) ) = > Err ( err . into ( ) ) ,
None = > Ok ( false ) ,
}
}
Self ::Regex {
regex , multiline , ..
} = > {
let mut reader = BufReader ::new ( stream ) ;
if * multiline {
let mut text = String ::new ( ) ;
if let Err ( err ) = reader . read_to_string ( & mut text ) {
Err ( err . into ( ) )
} else {
Ok ( regex . find ( & text ) . is_some ( ) )
}
} else {
for line in reader . lines ( ) {
let line = line ? ;
if regex . find ( & line ) . is_some ( ) {
return Ok ( true ) ;
}
}
Ok ( false )
}
}
}
}
2023-09-21 14:27:58 +00:00
/// Returns the replacement text for this `SearchQuery`.
pub fn replacement ( & self ) -> Option < & str > {
match self {
SearchQuery ::Text { replacement , .. } | SearchQuery ::Regex { replacement , .. } = > {
replacement . as_deref ( )
}
}
}
/// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
pub fn replacement_for < ' a > ( & self , text : & ' a str ) -> Option < Cow < ' a , str > > {
2023-09-12 16:46:54 +00:00
match self {
SearchQuery ::Text { replacement , .. } = > replacement . clone ( ) . map ( Cow ::from ) ,
SearchQuery ::Regex {
regex , replacement , ..
} = > {
if let Some ( replacement ) = replacement {
2024-03-25 11:21:04 +00:00
let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
. get_or_init ( | | Regex ::new ( r "\\\\|\\n|\\t" ) . unwrap ( ) )
. replace_all ( replacement , | c : & Captures | {
match c . get ( 0 ) . unwrap ( ) . as_str ( ) {
r "\\" = > " \\ " ,
r "\n" = > " \n " ,
r "\t" = > " \t " ,
x = > unreachable! ( " Unexpected escape sequence: {} " , x ) ,
}
} ) ;
2023-09-12 16:46:54 +00:00
Some ( regex . replace ( text , replacement ) )
} else {
None
}
}
}
}
2024-05-08 21:52:15 +00:00
2023-08-22 08:35:20 +00:00
pub async fn search (
& self ,
buffer : & BufferSnapshot ,
subrange : Option < Range < usize > > ,
) -> Vec < Range < usize > > {
2022-02-27 14:47:46 +00:00
const YIELD_INTERVAL : usize = 20000 ;
if self . as_str ( ) . is_empty ( ) {
return Default ::default ( ) ;
}
2023-08-25 22:46:30 +00:00
let range_offset = subrange . as_ref ( ) . map ( | r | r . start ) . unwrap_or ( 0 ) ;
2023-08-22 08:35:20 +00:00
let rope = if let Some ( range ) = subrange {
buffer . as_rope ( ) . slice ( range )
} else {
buffer . as_rope ( ) . clone ( )
} ;
2022-02-27 14:47:46 +00:00
let mut matches = Vec ::new ( ) ;
match self {
Self ::Text {
search , whole_word , ..
} = > {
for ( ix , mat ) in search
. stream_find_iter ( rope . bytes_in_range ( 0 .. rope . len ( ) ) )
. enumerate ( )
{
if ( ix + 1 ) % YIELD_INTERVAL = = 0 {
yield_now ( ) . await ;
}
let mat = mat . unwrap ( ) ;
if * whole_word {
2023-08-25 22:46:30 +00:00
let scope = buffer . language_scope_at ( range_offset + mat . start ( ) ) ;
let kind = | c | char_kind ( & scope , c ) ;
2023-08-22 08:35:20 +00:00
let prev_kind = rope . reversed_chars_at ( mat . start ( ) ) . next ( ) . map ( kind ) ;
let start_kind = kind ( rope . chars_at ( mat . start ( ) ) . next ( ) . unwrap ( ) ) ;
let end_kind = kind ( rope . reversed_chars_at ( mat . end ( ) ) . next ( ) . unwrap ( ) ) ;
let next_kind = rope . chars_at ( mat . end ( ) ) . next ( ) . map ( kind ) ;
2022-02-27 14:47:46 +00:00
if Some ( start_kind ) = = prev_kind | | Some ( end_kind ) = = next_kind {
continue ;
}
}
matches . push ( mat . start ( ) .. mat . end ( ) )
}
}
2023-08-22 08:35:20 +00:00
2022-02-27 14:47:46 +00:00
Self ::Regex {
regex , multiline , ..
} = > {
if * multiline {
let text = rope . to_string ( ) ;
for ( ix , mat ) in regex . find_iter ( & text ) . enumerate ( ) {
if ( ix + 1 ) % YIELD_INTERVAL = = 0 {
yield_now ( ) . await ;
}
matches . push ( mat . start ( ) .. mat . end ( ) ) ;
}
} else {
let mut line = String ::new ( ) ;
let mut line_offset = 0 ;
for ( chunk_ix , chunk ) in rope . chunks ( ) . chain ( [ " \n " ] ) . enumerate ( ) {
if ( chunk_ix + 1 ) % YIELD_INTERVAL = = 0 {
yield_now ( ) . await ;
}
for ( newline_ix , text ) in chunk . split ( '\n' ) . enumerate ( ) {
if newline_ix > 0 {
for mat in regex . find_iter ( & line ) {
let start = line_offset + mat . start ( ) ;
let end = line_offset + mat . end ( ) ;
matches . push ( start .. end ) ;
}
line_offset + = line . len ( ) + 1 ;
line . clear ( ) ;
}
line . push_str ( text ) ;
}
}
}
}
}
2023-08-22 08:35:20 +00:00
2022-02-27 14:47:46 +00:00
matches
}
2024-03-14 14:06:00 +00:00
pub fn is_empty ( & self ) -> bool {
self . as_str ( ) . is_empty ( )
}
2022-02-27 14:47:46 +00:00
pub fn as_str ( & self ) -> & str {
2023-08-16 14:50:54 +00:00
self . as_inner ( ) . as_str ( )
2022-02-27 14:47:46 +00:00
}
pub fn whole_word ( & self ) -> bool {
match self {
Self ::Text { whole_word , .. } = > * whole_word ,
Self ::Regex { whole_word , .. } = > * whole_word ,
}
}
pub fn case_sensitive ( & self ) -> bool {
match self {
Self ::Text { case_sensitive , .. } = > * case_sensitive ,
Self ::Regex { case_sensitive , .. } = > * case_sensitive ,
}
}
2023-11-10 08:56:28 +00:00
pub fn include_ignored ( & self ) -> bool {
match self {
Self ::Text {
include_ignored , ..
} = > * include_ignored ,
Self ::Regex {
include_ignored , ..
} = > * include_ignored ,
}
}
2022-02-27 14:47:46 +00:00
pub fn is_regex ( & self ) -> bool {
matches! ( self , Self ::Regex { .. } )
}
2023-05-07 19:17:26 +00:00
2023-07-28 09:56:44 +00:00
pub fn files_to_include ( & self ) -> & [ PathMatcher ] {
2023-08-16 14:50:54 +00:00
self . as_inner ( ) . files_to_include ( )
2023-05-07 19:17:26 +00:00
}
2023-05-07 19:50:54 +00:00
2023-07-28 09:56:44 +00:00
pub fn files_to_exclude ( & self ) -> & [ PathMatcher ] {
2023-08-16 14:50:54 +00:00
self . as_inner ( ) . files_to_exclude ( )
2023-05-07 19:17:26 +00:00
}
2023-05-07 19:50:54 +00:00
pub fn file_matches ( & self , file_path : Option < & Path > ) -> bool {
match file_path {
Some ( file_path ) = > {
2023-12-04 21:00:01 +00:00
let mut path = file_path . to_path_buf ( ) ;
loop {
2023-12-05 10:28:26 +00:00
if self
2023-12-04 21:00:01 +00:00
. files_to_exclude ( )
. iter ( )
. any ( | exclude_glob | exclude_glob . is_match ( & path ) )
2023-12-05 10:28:26 +00:00
{
return false ;
} else if self . files_to_include ( ) . is_empty ( )
| | self
. files_to_include ( )
. iter ( )
. any ( | include_glob | include_glob . is_match ( & path ) )
{
return true ;
} else if ! path . pop ( ) {
return false ;
2023-12-04 21:00:01 +00:00
}
}
2023-05-07 19:50:54 +00:00
}
None = > self . files_to_include ( ) . is_empty ( ) ,
}
}
2023-08-16 14:50:54 +00:00
pub fn as_inner ( & self ) -> & SearchInputs {
match self {
Self ::Regex { inner , .. } | Self ::Text { inner , .. } = > inner ,
}
}
2022-02-27 14:47:46 +00:00
}
2023-05-19 16:13:31 +00:00
2023-07-28 09:56:44 +00:00
fn deserialize_path_matches ( glob_set : & str ) -> anyhow ::Result < Vec < PathMatcher > > {
2023-05-19 16:13:31 +00:00
glob_set
. split ( ',' )
. map ( str ::trim )
. filter ( | glob_str | ! glob_str . is_empty ( ) )
2023-07-28 09:56:44 +00:00
. map ( | glob_str | {
PathMatcher ::new ( glob_str )
. with_context ( | | format! ( " deserializing path match glob {glob_str} " ) )
} )
2023-05-19 16:13:31 +00:00
. collect ( )
}
2023-07-28 09:56:44 +00:00
#[ cfg(test) ]
mod tests {
use super ::* ;
#[ test ]
fn path_matcher_creation_for_valid_paths ( ) {
for valid_path in [
" file " ,
" Cargo.toml " ,
" .DS_Store " ,
" ~/dir/another_dir/ " ,
" ./dir/file " ,
" dir/[a-z].txt " ,
" ../dir/filé " ,
] {
let path_matcher = PathMatcher ::new ( valid_path ) . unwrap_or_else ( | e | {
panic! ( " Valid path {valid_path} should be accepted, but got: {e} " )
} ) ;
assert! (
path_matcher . is_match ( valid_path ) ,
" Path matcher for valid path {valid_path} should match itself "
)
}
}
#[ test ]
fn path_matcher_creation_for_globs ( ) {
for invalid_glob in [ " dir/[].txt " , " dir/[a-z.txt " , " dir/{file " ] {
match PathMatcher ::new ( invalid_glob ) {
Ok ( _ ) = > panic! ( " Invalid glob {invalid_glob} should not be accepted " ) ,
Err ( _expected ) = > { }
}
}
for valid_glob in [
" dir/?ile " ,
" dir/*.txt " ,
" dir/**/file " ,
" dir/[a-z].txt " ,
" {dir,file} " ,
] {
match PathMatcher ::new ( valid_glob ) {
Ok ( _expected ) = > { }
Err ( e ) = > panic! ( " Valid glob {valid_glob} should be accepted, but got: {e} " ) ,
}
}
}
}