Fix panic in fuzzy-finder for unicode characters (#7080)

This fixes a panic in the fuzzy finder which someone ran into when
typing in a query that contained the lower-case version of a unicode
character that has more chars than its upper-case version.

It also fixes another problem which was that we didn't find a match if
both candidates and query contained upper-case characters whose
lower-case version had more chars.


Release Notes:

- Fixed a panic in fuzzy-finder that could occur when matching with
queries containing upper-case unicode characters whose lower-case
version has more chars.

Co-authored-by: bennetbo <bennetbo@gmx.de>
This commit is contained in:
Thorsten Ball 2024-01-30 16:10:35 +01:00 committed by GitHub
parent 6c7893db35
commit 6d4fe8098b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -45,7 +45,7 @@ impl<'a> Matcher<'a> {
lowercase_query,
query_char_bag,
min_score: 0.0,
last_positions: vec![0; query.len()],
last_positions: vec![0; lowercase_query.len()],
match_positions: vec![0; query.len()],
score_matrix: Vec::new(),
best_position_matrix: Vec::new(),
@ -82,7 +82,7 @@ impl<'a> Matcher<'a> {
lowercase_candidate_chars.clear();
for c in candidate.to_string().chars() {
candidate_chars.push(c);
lowercase_candidate_chars.push(c.to_ascii_lowercase());
lowercase_candidate_chars.append(&mut c.to_lowercase().collect::<Vec<_>>());
}
if !self.find_last_positions(lowercase_prefix, &lowercase_candidate_chars) {
@ -383,6 +383,25 @@ mod tests {
);
}
#[test]
fn test_lowercase_longer_than_uppercase() {
// This character has more chars in lower-case than in upper-case.
let paths = vec!["\u{0130}"];
let query = "\u{0130}";
assert_eq!(
match_single_path_query(query, false, &paths),
vec![("\u{0130}", vec![0])]
);
// Path is the lower-case version of the query
let paths = vec!["i\u{307}"];
let query = "\u{0130}";
assert_eq!(
match_single_path_query(query, false, &paths),
vec![("i\u{307}", vec![0])]
);
}
#[test]
fn test_match_multibyte_path_entries() {
let paths = vec!["aαbβ/cγ", "αβγδ/bcde", "c1⃣2⃣3⃣/d4⃣5⃣6⃣/e7⃣8⃣9⃣/f", "/d/🆒/h"];