From 1190a87a052130f9ef6d8dd030c920fe4d5c9ab7 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Sat, 15 May 2021 10:08:01 +0200 Subject: [PATCH 1/6] Avoid heap allocation when splitting an existing chunk --- zed/src/editor/buffer/rope.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs index 3b85c04c7e..9d902e6eee 100644 --- a/zed/src/editor/buffer/rope.rs +++ b/zed/src/editor/buffer/rope.rs @@ -56,7 +56,9 @@ impl Rope { if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE { last_chunk.0.push_str(&first_new_chunk.take().unwrap().0); } else { - let text = [last_chunk.0, first_new_chunk_ref.0].concat(); + let mut text = ArrayString::<[_; 4 * CHUNK_BASE]>::new(); + text.push_str(&last_chunk.0); + text.push_str(&first_new_chunk_ref.0); let mut midpoint = text.len() / 2; while !text.is_char_boundary(midpoint) { midpoint += 1; From def0aa98b204bccebc1333a7e61ad244fd05fa45 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Sat, 15 May 2021 10:08:37 +0200 Subject: [PATCH 2/6] Maximize chunks occupation by splitting chunks appropriately --- zed/src/editor/buffer/rope.rs | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs index 9d902e6eee..cadf994c5d 100644 --- a/zed/src/editor/buffer/rope.rs +++ b/zed/src/editor/buffer/rope.rs @@ -59,11 +59,7 @@ impl Rope { let mut text = ArrayString::<[_; 4 * CHUNK_BASE]>::new(); text.push_str(&last_chunk.0); text.push_str(&first_new_chunk_ref.0); - let mut midpoint = text.len() / 2; - while !text.is_char_boundary(midpoint) { - midpoint += 1; - } - let (left, right) = text.split_at(midpoint); + let (left, right) = text.split_at(find_split_ix(&text)); last_chunk.0.clear(); last_chunk.0.push_str(left); first_new_chunk_ref.0.clear(); @@ -221,7 +217,7 @@ impl<'a> Cursor<'a> { } #[derive(Clone, Debug, Default)] -struct Chunk(ArrayString<[u8; 4 * CHUNK_BASE]>); +struct Chunk(ArrayString<[u8; 2 * CHUNK_BASE]>); impl Chunk { fn to_point(&self, target: usize) -> Point { @@ -397,6 +393,25 @@ impl<'a> Iterator for Chars<'a> { } } +fn find_split_ix(text: &str) -> usize { + let mut ix = text.len() / 2; + while !text.is_char_boundary(ix) { + if ix < 2 * CHUNK_BASE { + ix += 1; + } else { + ix = (text.len() / 2) - 1; + break; + } + } + while !text.is_char_boundary(ix) { + ix -= 1; + } + + debug_assert!(ix <= 2 * CHUNK_BASE); + debug_assert!(text.len() - ix <= 2 * CHUNK_BASE); + ix +} + #[cfg(test)] mod tests { use crate::util::RandomCharIter; From 76a74e431eb66d1ceea1abc23964c96bfa9bb5b4 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Sat, 15 May 2021 11:29:48 +0200 Subject: [PATCH 3/6] Introduce `rope::Cursor::summary` to avoid slicing only to get a summary This also deletes `Rope::slice`, as it's just a convenience method that can be easily re-implemented by using the cursor. --- zed/src/editor/buffer/mod.rs | 3 +- zed/src/editor/buffer/rope.rs | 70 ++++++++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/zed/src/editor/buffer/mod.rs b/zed/src/editor/buffer/mod.rs index 87ebf272cb..6bbebbbf26 100644 --- a/zed/src/editor/buffer/mod.rs +++ b/zed/src/editor/buffer/mod.rs @@ -600,8 +600,7 @@ impl Buffer { } pub fn text_summary_for_range(&self, range: Range) -> TextSummary { - // TODO: Use a dedicated ::summarize method in Rope. - self.visible_text.slice(range).summary() + self.visible_text.cursor(range.start).summary(range.end) } pub fn len(&self) -> usize { diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs index cadf994c5d..994022deee 100644 --- a/zed/src/editor/buffer/rope.rs +++ b/zed/src/editor/buffer/rope.rs @@ -4,7 +4,7 @@ use crate::util::byte_range_for_char_range; use anyhow::{anyhow, Result}; use arrayvec::ArrayString; use smallvec::SmallVec; -use std::{cmp, iter::Skip, ops::Range, str}; +use std::{cmp, iter::Skip, str}; #[cfg(test)] const CHUNK_BASE: usize = 6; @@ -89,10 +89,6 @@ impl Rope { } } - pub fn slice(&self, range: Range) -> Rope { - self.cursor(range.start).slice(range.end) - } - pub fn summary(&self) -> TextSummary { self.chunks.summary() } @@ -207,6 +203,30 @@ impl<'a> Cursor<'a> { slice } + pub fn summary(&mut self, end_offset: usize) -> TextSummary { + debug_assert!(end_offset >= self.offset); + + let mut summary = TextSummary::default(); + if let Some(start_chunk) = self.chunks.item() { + let start_ix = self.offset - self.chunks.start(); + let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start(); + let byte_range = byte_range_for_char_range(start_chunk.0, start_ix..end_ix); + summary = TextSummary::from(&start_chunk.0[byte_range]); + } + + if end_offset > self.chunks.end() { + self.chunks.next(); + summary += &self.chunks.summary(&end_offset, SeekBias::Right, &()); + if let Some(end_chunk) = self.chunks.item() { + let end_ix = end_offset - self.chunks.start(); + let byte_range = byte_range_for_char_range(end_chunk.0, 0..end_ix); + summary += TextSummary::from(&end_chunk.0[byte_range]); + } + } + + summary + } + pub fn suffix(mut self) -> Rope { self.slice(self.rope.chunks.extent()) } @@ -263,12 +283,27 @@ impl sum_tree::Item for Chunk { type Summary = TextSummary; fn summary(&self) -> Self::Summary { + TextSummary::from(self.0.as_str()) + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct TextSummary { + pub chars: usize, + pub bytes: usize, + pub lines: Point, + pub first_line_len: u32, + pub rightmost_point: Point, +} + +impl<'a> From<&'a str> for TextSummary { + fn from(text: &'a str) -> Self { let mut chars = 0; let mut bytes = 0; let mut lines = Point::new(0, 0); let mut first_line_len = 0; let mut rightmost_point = Point::new(0, 0); - for c in self.0.chars() { + for c in text.chars() { chars += 1; bytes += c.len_utf8(); if c == '\n' { @@ -295,15 +330,6 @@ impl sum_tree::Item for Chunk { } } -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct TextSummary { - pub chars: usize, - pub bytes: usize, - pub lines: Point, - pub first_line_len: u32, - pub rightmost_point: Point, -} - impl sum_tree::Summary for TextSummary { type Context = (); @@ -364,7 +390,7 @@ pub struct Chars<'a> { impl<'a> Chars<'a> { pub fn new(rope: &'a Rope, start: usize) -> Self { let mut cursor = rope.chunks.cursor::(); - cursor.slice(&start, SeekBias::Left, &()); + cursor.seek(&start, SeekBias::Left, &()); let chars = if let Some(chunk) = cursor.item() { let ix = start - cursor.start(); cursor.next(); @@ -472,7 +498,7 @@ mod tests { log::info!("text: {:?}", expected); for _ in 0..5 { - let ix = rng.gen_range(0..=expected.len()); + let ix = rng.gen_range(0..=expected.chars().count()); assert_eq!( actual.chars_at(ix).collect::(), expected.chars().skip(ix).collect::() @@ -492,6 +518,16 @@ mod tests { } offset += 1; } + + for _ in 0..5 { + let end_ix = rng.gen_range(0..=expected.chars().count()); + let start_ix = rng.gen_range(0..=end_ix); + let byte_range = byte_range_for_char_range(&expected, start_ix..end_ix); + assert_eq!( + actual.cursor(start_ix).summary(end_ix), + TextSummary::from(&expected[byte_range.start..byte_range.end]) + ); + } } } } From c9987f9488fde97c5f919cd1d1d9453793411e23 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Sat, 15 May 2021 11:32:34 +0200 Subject: [PATCH 4/6] Optimize `Rope::append` by merging chunks only when they're underflowing --- zed/src/editor/buffer/rope.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs index 994022deee..78ed156308 100644 --- a/zed/src/editor/buffer/rope.rs +++ b/zed/src/editor/buffer/rope.rs @@ -26,8 +26,12 @@ impl Rope { let mut chunks = rope.chunks.cursor::<(), ()>(); chunks.next(); if let Some(chunk) = chunks.item() { - self.push(&chunk.0); - chunks.next(); + if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE) + || chunk.0.len() < CHUNK_BASE + { + self.push(&chunk.0); + chunks.next(); + } } self.chunks.push_tree(chunks.suffix(&()), &()); From 5f93d7f755dbf6a86b1c735ce3343f15d7fd2939 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Sat, 15 May 2021 11:43:29 +0200 Subject: [PATCH 5/6] Return error in `Rope::to_offset(point)` when the point doesn't exist --- zed/src/editor/buffer/rope.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs index 78ed156308..0dec8fbc14 100644 --- a/zed/src/editor/buffer/rope.rs +++ b/zed/src/editor/buffer/rope.rs @@ -136,12 +136,14 @@ impl Rope { } pub fn to_offset(&self, point: Point) -> Result { - // TODO: Verify the point actually exists. if point <= self.summary().lines { let mut cursor = self.chunks.cursor::(); cursor.seek(&point, SeekBias::Left, &()); let overshoot = point - cursor.start().lines; - Ok(cursor.start().chars + cursor.item().map_or(0, |chunk| chunk.to_offset(overshoot))) + Ok(cursor.start().chars + + cursor + .item() + .map_or(Ok(0), |chunk| chunk.to_offset(overshoot))?) } else { Err(anyhow!("offset out of bounds")) } @@ -263,7 +265,7 @@ impl Chunk { point } - fn to_offset(&self, target: Point) -> usize { + fn to_offset(&self, target: Point) -> Result { let mut offset = 0; let mut point = Point::new(0, 0); for ch in self.0.chars() { @@ -279,7 +281,12 @@ impl Chunk { } offset += 1; } - offset + + if point == target { + Ok(offset) + } else { + Err(anyhow!("point out of bounds")) + } } } @@ -515,6 +522,10 @@ mod tests { assert_eq!(actual.to_point(offset).unwrap(), point); assert_eq!(actual.to_offset(point).unwrap(), offset); if ch == '\n' { + assert!(actual + .to_offset(Point::new(point.row, point.column + 1)) + .is_err()); + point.row += 1; point.column = 0 } else { @@ -522,6 +533,10 @@ mod tests { } offset += 1; } + assert_eq!(actual.to_point(offset).unwrap(), point); + assert!(actual.to_point(offset + 1).is_err()); + assert_eq!(actual.to_offset(point).unwrap(), offset); + assert!(actual.to_offset(Point::new(point.row + 1, 0)).is_err()); for _ in 0..5 { let end_ix = rng.gen_range(0..=expected.chars().count()); From 81e162318f120f40ff865763d8f0ed98de075f52 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Sat, 15 May 2021 11:52:49 +0200 Subject: [PATCH 6/6] :lipstick: --- zed/src/editor/buffer/rope.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs index 0dec8fbc14..9924120cc0 100644 --- a/zed/src/editor/buffer/rope.rs +++ b/zed/src/editor/buffer/rope.rs @@ -544,7 +544,7 @@ mod tests { let byte_range = byte_range_for_char_range(&expected, start_ix..end_ix); assert_eq!( actual.cursor(start_ix).summary(end_ix), - TextSummary::from(&expected[byte_range.start..byte_range.end]) + TextSummary::from(&expected[byte_range]) ); } }