From 6d4fe8098b8fc17f7cd92fb5c9f8fb3ee0c29da7 Mon Sep 17 00:00:00 2001 From: Thorsten Ball Date: Tue, 30 Jan 2024 16:10:35 +0100 Subject: [PATCH] Fix panic in fuzzy-finder for unicode characters (#7080) This fixes a panic in the fuzzy finder which someone ran into when typing in a query that contained the lower-case version of a unicode character that has more chars than its upper-case version. It also fixes another problem which was that we didn't find a match if both candidates and query contained upper-case characters whose lower-case version had more chars. Release Notes: - Fixed a panic in fuzzy-finder that could occur when matching with queries containing upper-case unicode characters whose lower-case version has more chars. Co-authored-by: bennetbo --- crates/fuzzy/src/matcher.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/crates/fuzzy/src/matcher.rs b/crates/fuzzy/src/matcher.rs index e808a4886f..9b0d62893b 100644 --- a/crates/fuzzy/src/matcher.rs +++ b/crates/fuzzy/src/matcher.rs @@ -45,7 +45,7 @@ impl<'a> Matcher<'a> { lowercase_query, query_char_bag, min_score: 0.0, - last_positions: vec![0; query.len()], + last_positions: vec![0; lowercase_query.len()], match_positions: vec![0; query.len()], score_matrix: Vec::new(), best_position_matrix: Vec::new(), @@ -82,7 +82,7 @@ impl<'a> Matcher<'a> { lowercase_candidate_chars.clear(); for c in candidate.to_string().chars() { candidate_chars.push(c); - lowercase_candidate_chars.push(c.to_ascii_lowercase()); + lowercase_candidate_chars.append(&mut c.to_lowercase().collect::>()); } if !self.find_last_positions(lowercase_prefix, &lowercase_candidate_chars) { @@ -383,6 +383,25 @@ mod tests { ); } + #[test] + fn test_lowercase_longer_than_uppercase() { + // This character has more chars in lower-case than in upper-case. + let paths = vec!["\u{0130}"]; + let query = "\u{0130}"; + assert_eq!( + match_single_path_query(query, false, &paths), + vec![("\u{0130}", vec![0])] + ); + + // Path is the lower-case version of the query + let paths = vec!["i\u{307}"]; + let query = "\u{0130}"; + assert_eq!( + match_single_path_query(query, false, &paths), + vec![("i\u{307}", vec![0])] + ); + } + #[test] fn test_match_multibyte_path_entries() { let paths = vec!["aαbβ/cγdδ", "αβγδ/bcde", "c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", "/d/🆒/h"];