From c46be992e07e286699e5f2e8247120c46148f6a9 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Mon, 25 Jul 2022 15:02:45 +0200 Subject: [PATCH] Introduce `Rope::clip_offset_utf16` --- crates/editor/src/multi_buffer.rs | 4 +-- crates/text/src/rope.rs | 59 +++++++++++++++++++++++++++---- crates/text/src/tests.rs | 10 +++--- 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/crates/editor/src/multi_buffer.rs b/crates/editor/src/multi_buffer.rs index abf1689343..71d0053e56 100644 --- a/crates/editor/src/multi_buffer.rs +++ b/crates/editor/src/multi_buffer.rs @@ -1888,7 +1888,7 @@ impl MultiBufferSnapshot { .offset_to_offset_utf16(excerpt_start_offset + overshoot); *start_offset_utf16 + (buffer_offset_utf16 - excerpt_start_offset_utf16) } else { - OffsetUtf16(self.excerpts.summary().text.len_utf16) + self.excerpts.summary().text.len_utf16 } } @@ -2935,7 +2935,7 @@ impl<'a> sum_tree::SeekTarget<'a, ExcerptSummary, ExcerptSummary> for Option<&'a impl<'a> sum_tree::Dimension<'a, ExcerptSummary> for OffsetUtf16 { fn add_summary(&mut self, summary: &'a ExcerptSummary, _: &()) { - self.0 += summary.text.len_utf16; + *self += summary.text.len_utf16; } } diff --git a/crates/text/src/rope.rs b/crates/text/src/rope.rs index 8d278bb8f7..0cdb3b299e 100644 --- a/crates/text/src/rope.rs +++ b/crates/text/src/rope.rs @@ -166,7 +166,7 @@ impl Rope { pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 { if offset >= self.summary().len { - return OffsetUtf16(self.summary().len_utf16); + return self.summary().len_utf16; } let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>(); cursor.seek(&offset, Bias::Left, &()); @@ -178,7 +178,7 @@ impl Rope { } pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize { - if offset.0 >= self.summary().len_utf16 { + if offset >= self.summary().len_utf16 { return self.summary().len; } let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>(); @@ -291,6 +291,17 @@ impl Rope { } } + pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 { + let mut cursor = self.chunks.cursor::(); + cursor.seek(&offset, Bias::Right, &()); + if let Some(chunk) = cursor.item() { + let overshoot = offset - cursor.start(); + *cursor.start() + chunk.clip_offset_utf16(overshoot, bias) + } else { + self.summary().len_utf16 + } + } + pub fn clip_point(&self, point: Point, bias: Bias) -> Point { let mut cursor = self.chunks.cursor::(); cursor.seek(&point, Bias::Right, &()); @@ -765,6 +776,18 @@ impl Chunk { } unreachable!() } + + fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 { + let mut code_units = self.0.encode_utf16(); + let mut offset = code_units.by_ref().take(target.0 as usize).count(); + if char::decode_utf16(code_units).next().transpose().is_err() { + match bias { + Bias::Left => offset -= 1, + Bias::Right => offset += 1, + } + } + OffsetUtf16(offset) + } } impl sum_tree::Item for Chunk { @@ -802,7 +825,7 @@ impl sum_tree::Summary for ChunkSummary { #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct TextSummary { pub len: usize, - pub len_utf16: usize, + pub len_utf16: OffsetUtf16, pub lines: Point, pub lines_utf16: PointUtf16, pub first_line_chars: u32, @@ -813,7 +836,7 @@ pub struct TextSummary { impl<'a> From<&'a str> for TextSummary { fn from(text: &'a str) -> Self { - let mut len_utf16 = 0; + let mut len_utf16 = OffsetUtf16(0); let mut lines = Point::new(0, 0); let mut lines_utf16 = PointUtf16::new(0, 0); let mut first_line_chars = 0; @@ -821,7 +844,7 @@ impl<'a> From<&'a str> for TextSummary { let mut longest_row = 0; let mut longest_row_chars = 0; for c in text.chars() { - len_utf16 += c.len_utf16(); + len_utf16.0 += c.len_utf16(); if c == '\n' { lines += Point::new(1, 0); @@ -961,13 +984,13 @@ impl TextDimension for usize { impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 { fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) { - self.0 += summary.text.len_utf16; + *self += summary.text.len_utf16; } } impl TextDimension for OffsetUtf16 { fn from_text_summary(summary: &TextSummary) -> Self { - Self(summary.len_utf16) + summary.len_utf16 } fn add_assign(&mut self, other: &Self) { @@ -1075,6 +1098,19 @@ mod tests { rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right), PointUtf16::new(0, 2) ); + + assert_eq!( + rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left), + OffsetUtf16(0) + ); + assert_eq!( + rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right), + OffsetUtf16(2) + ); + assert_eq!( + rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right), + OffsetUtf16(2) + ); } #[gpui::test(iterations = 100)] @@ -1174,8 +1210,16 @@ mod tests { offset_utf16.0 += ch.len_utf16(); } + let mut offset_utf16 = OffsetUtf16(0); let mut point_utf16 = PointUtf16::zero(); for unit in expected.encode_utf16() { + let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left); + let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right); + assert!(right_offset >= left_offset); + // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic. + actual.offset_utf16_to_offset(left_offset); + actual.offset_utf16_to_offset(right_offset); + let left_point = actual.clip_point_utf16(point_utf16, Bias::Left); let right_point = actual.clip_point_utf16(point_utf16, Bias::Right); assert!(right_point >= left_point); @@ -1183,6 +1227,7 @@ mod tests { actual.point_utf16_to_offset(left_point); actual.point_utf16_to_offset(right_point); + offset_utf16.0 += 1; if unit == b'\n' as u16 { point_utf16 += PointUtf16::new(1, 0); } else { diff --git a/crates/text/src/tests.rs b/crates/text/src/tests.rs index 5d5fba2be0..d9f7440e8d 100644 --- a/crates/text/src/tests.rs +++ b/crates/text/src/tests.rs @@ -248,7 +248,7 @@ fn test_text_summary_for_range() { buffer.text_summary_for_range::(1..3), TextSummary { len: 2, - len_utf16: 2, + len_utf16: OffsetUtf16(2), lines: Point::new(1, 0), lines_utf16: PointUtf16::new(1, 0), first_line_chars: 1, @@ -261,7 +261,7 @@ fn test_text_summary_for_range() { buffer.text_summary_for_range::(1..12), TextSummary { len: 11, - len_utf16: 11, + len_utf16: OffsetUtf16(11), lines: Point::new(3, 0), lines_utf16: PointUtf16::new(3, 0), first_line_chars: 1, @@ -274,7 +274,7 @@ fn test_text_summary_for_range() { buffer.text_summary_for_range::(0..20), TextSummary { len: 20, - len_utf16: 20, + len_utf16: OffsetUtf16(20), lines: Point::new(4, 1), lines_utf16: PointUtf16::new(4, 1), first_line_chars: 2, @@ -287,7 +287,7 @@ fn test_text_summary_for_range() { buffer.text_summary_for_range::(0..22), TextSummary { len: 22, - len_utf16: 22, + len_utf16: OffsetUtf16(22), lines: Point::new(4, 3), lines_utf16: PointUtf16::new(4, 3), first_line_chars: 2, @@ -300,7 +300,7 @@ fn test_text_summary_for_range() { buffer.text_summary_for_range::(7..22), TextSummary { len: 15, - len_utf16: 15, + len_utf16: OffsetUtf16(15), lines: Point::new(2, 3), lines_utf16: PointUtf16::new(2, 3), first_line_chars: 4,