Introduce Rope::clip_offset_utf16

This commit is contained in:
Antonio Scandurra 2022-07-25 15:02:45 +02:00
parent bb55d654ce
commit c46be992e0
3 changed files with 59 additions and 14 deletions

View file

@ -1888,7 +1888,7 @@ impl MultiBufferSnapshot {
.offset_to_offset_utf16(excerpt_start_offset + overshoot);
*start_offset_utf16 + (buffer_offset_utf16 - excerpt_start_offset_utf16)
} else {
OffsetUtf16(self.excerpts.summary().text.len_utf16)
self.excerpts.summary().text.len_utf16
}
}
@ -2935,7 +2935,7 @@ impl<'a> sum_tree::SeekTarget<'a, ExcerptSummary, ExcerptSummary> for Option<&'a
impl<'a> sum_tree::Dimension<'a, ExcerptSummary> for OffsetUtf16 {
fn add_summary(&mut self, summary: &'a ExcerptSummary, _: &()) {
self.0 += summary.text.len_utf16;
*self += summary.text.len_utf16;
}
}

View file

@ -166,7 +166,7 @@ impl Rope {
pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
if offset >= self.summary().len {
return OffsetUtf16(self.summary().len_utf16);
return self.summary().len_utf16;
}
let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
cursor.seek(&offset, Bias::Left, &());
@ -178,7 +178,7 @@ impl Rope {
}
pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
if offset.0 >= self.summary().len_utf16 {
if offset >= self.summary().len_utf16 {
return self.summary().len;
}
let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
@ -291,6 +291,17 @@ impl Rope {
}
}
pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
let mut cursor = self.chunks.cursor::<OffsetUtf16>();
cursor.seek(&offset, Bias::Right, &());
if let Some(chunk) = cursor.item() {
let overshoot = offset - cursor.start();
*cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
} else {
self.summary().len_utf16
}
}
pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
let mut cursor = self.chunks.cursor::<Point>();
cursor.seek(&point, Bias::Right, &());
@ -765,6 +776,18 @@ impl Chunk {
}
unreachable!()
}
fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
let mut code_units = self.0.encode_utf16();
let mut offset = code_units.by_ref().take(target.0 as usize).count();
if char::decode_utf16(code_units).next().transpose().is_err() {
match bias {
Bias::Left => offset -= 1,
Bias::Right => offset += 1,
}
}
OffsetUtf16(offset)
}
}
impl sum_tree::Item for Chunk {
@ -802,7 +825,7 @@ impl sum_tree::Summary for ChunkSummary {
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct TextSummary {
pub len: usize,
pub len_utf16: usize,
pub len_utf16: OffsetUtf16,
pub lines: Point,
pub lines_utf16: PointUtf16,
pub first_line_chars: u32,
@ -813,7 +836,7 @@ pub struct TextSummary {
impl<'a> From<&'a str> for TextSummary {
fn from(text: &'a str) -> Self {
let mut len_utf16 = 0;
let mut len_utf16 = OffsetUtf16(0);
let mut lines = Point::new(0, 0);
let mut lines_utf16 = PointUtf16::new(0, 0);
let mut first_line_chars = 0;
@ -821,7 +844,7 @@ impl<'a> From<&'a str> for TextSummary {
let mut longest_row = 0;
let mut longest_row_chars = 0;
for c in text.chars() {
len_utf16 += c.len_utf16();
len_utf16.0 += c.len_utf16();
if c == '\n' {
lines += Point::new(1, 0);
@ -961,13 +984,13 @@ impl TextDimension for usize {
impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
self.0 += summary.text.len_utf16;
*self += summary.text.len_utf16;
}
}
impl TextDimension for OffsetUtf16 {
fn from_text_summary(summary: &TextSummary) -> Self {
Self(summary.len_utf16)
summary.len_utf16
}
fn add_assign(&mut self, other: &Self) {
@ -1075,6 +1098,19 @@ mod tests {
rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right),
PointUtf16::new(0, 2)
);
assert_eq!(
rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
OffsetUtf16(0)
);
assert_eq!(
rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
OffsetUtf16(2)
);
assert_eq!(
rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
OffsetUtf16(2)
);
}
#[gpui::test(iterations = 100)]
@ -1174,8 +1210,16 @@ mod tests {
offset_utf16.0 += ch.len_utf16();
}
let mut offset_utf16 = OffsetUtf16(0);
let mut point_utf16 = PointUtf16::zero();
for unit in expected.encode_utf16() {
let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
assert!(right_offset >= left_offset);
// Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
actual.offset_utf16_to_offset(left_offset);
actual.offset_utf16_to_offset(right_offset);
let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
assert!(right_point >= left_point);
@ -1183,6 +1227,7 @@ mod tests {
actual.point_utf16_to_offset(left_point);
actual.point_utf16_to_offset(right_point);
offset_utf16.0 += 1;
if unit == b'\n' as u16 {
point_utf16 += PointUtf16::new(1, 0);
} else {

View file

@ -248,7 +248,7 @@ fn test_text_summary_for_range() {
buffer.text_summary_for_range::<TextSummary, _>(1..3),
TextSummary {
len: 2,
len_utf16: 2,
len_utf16: OffsetUtf16(2),
lines: Point::new(1, 0),
lines_utf16: PointUtf16::new(1, 0),
first_line_chars: 1,
@ -261,7 +261,7 @@ fn test_text_summary_for_range() {
buffer.text_summary_for_range::<TextSummary, _>(1..12),
TextSummary {
len: 11,
len_utf16: 11,
len_utf16: OffsetUtf16(11),
lines: Point::new(3, 0),
lines_utf16: PointUtf16::new(3, 0),
first_line_chars: 1,
@ -274,7 +274,7 @@ fn test_text_summary_for_range() {
buffer.text_summary_for_range::<TextSummary, _>(0..20),
TextSummary {
len: 20,
len_utf16: 20,
len_utf16: OffsetUtf16(20),
lines: Point::new(4, 1),
lines_utf16: PointUtf16::new(4, 1),
first_line_chars: 2,
@ -287,7 +287,7 @@ fn test_text_summary_for_range() {
buffer.text_summary_for_range::<TextSummary, _>(0..22),
TextSummary {
len: 22,
len_utf16: 22,
len_utf16: OffsetUtf16(22),
lines: Point::new(4, 3),
lines_utf16: PointUtf16::new(4, 3),
first_line_chars: 2,
@ -300,7 +300,7 @@ fn test_text_summary_for_range() {
buffer.text_summary_for_range::<TextSummary, _>(7..22),
TextSummary {
len: 15,
len_utf16: 15,
len_utf16: OffsetUtf16(15),
lines: Point::new(2, 3),
lines_utf16: PointUtf16::new(2, 3),
first_line_chars: 4,