Merge pull request #1287 from zed-industries/handle-carriage-returns-in-edits

Fix crash when pasting text with CRLF line endings
This commit is contained in:
Max Brunsfeld 2022-07-05 17:35:38 -07:00 committed by GitHub
commit 77d688b4fb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 155 additions and 126 deletions

View file

@ -53,7 +53,6 @@ pub struct Buffer {
saved_version: clock::Global,
saved_version_fingerprint: String,
saved_mtime: SystemTime,
line_ending: LineEnding,
transaction_depth: usize,
was_dirty_before_starting_transaction: Option<bool>,
language: Option<Arc<Language>>,
@ -98,12 +97,6 @@ pub enum IndentKind {
Tab,
}
#[derive(Copy, Debug, Clone, PartialEq, Eq)]
pub enum LineEnding {
Unix,
Windows,
}
#[derive(Clone, Debug)]
struct SelectionSet {
line_mode: bool,
@ -314,32 +307,26 @@ impl CharKind {
}
impl Buffer {
pub fn new<T: Into<Arc<str>>>(
pub fn new<T: Into<String>>(
replica_id: ReplicaId,
base_text: T,
cx: &mut ModelContext<Self>,
) -> Self {
let history = History::new(base_text.into());
let line_ending = LineEnding::detect(&history.base_text);
Self::build(
TextBuffer::new(replica_id, cx.model_id() as u64, history),
TextBuffer::new(replica_id, cx.model_id() as u64, base_text.into()),
None,
line_ending,
)
}
pub fn from_file<T: Into<Arc<str>>>(
pub fn from_file<T: Into<String>>(
replica_id: ReplicaId,
base_text: T,
file: Arc<dyn File>,
cx: &mut ModelContext<Self>,
) -> Self {
let history = History::new(base_text.into());
let line_ending = LineEnding::detect(&history.base_text);
Self::build(
TextBuffer::new(replica_id, cx.model_id() as u64, history),
TextBuffer::new(replica_id, cx.model_id() as u64, base_text.into()),
Some(file),
line_ending,
)
}
@ -349,14 +336,12 @@ impl Buffer {
file: Option<Arc<dyn File>>,
cx: &mut ModelContext<Self>,
) -> Result<Self> {
let buffer = TextBuffer::new(
replica_id,
message.id,
History::new(Arc::from(message.base_text)),
);
let line_ending = proto::LineEnding::from_i32(message.line_ending)
.ok_or_else(|| anyhow!("missing line_ending"))?;
let mut this = Self::build(buffer, file, LineEnding::from_proto(line_ending));
let buffer = TextBuffer::new(replica_id, message.id, message.base_text);
let mut this = Self::build(buffer, file);
this.text.set_line_ending(proto::deserialize_line_ending(
proto::LineEnding::from_i32(message.line_ending)
.ok_or_else(|| anyhow!("missing line_ending"))?,
));
let ops = message
.operations
.into_iter()
@ -421,7 +406,7 @@ impl Buffer {
diagnostics: proto::serialize_diagnostics(self.diagnostics.iter()),
diagnostics_timestamp: self.diagnostics_timestamp.value,
completion_triggers: self.completion_triggers.clone(),
line_ending: self.line_ending.to_proto() as i32,
line_ending: proto::serialize_line_ending(self.line_ending()) as i32,
}
}
@ -430,7 +415,7 @@ impl Buffer {
self
}
fn build(buffer: TextBuffer, file: Option<Arc<dyn File>>, line_ending: LineEnding) -> Self {
fn build(buffer: TextBuffer, file: Option<Arc<dyn File>>) -> Self {
let saved_mtime;
if let Some(file) = file.as_ref() {
saved_mtime = file.mtime();
@ -446,7 +431,6 @@ impl Buffer {
was_dirty_before_starting_transaction: None,
text: buffer,
file,
line_ending,
syntax_tree: Mutex::new(None),
parsing_in_background: false,
parse_count: 0,
@ -507,7 +491,7 @@ impl Buffer {
self.remote_id(),
text,
version,
self.line_ending,
self.line_ending(),
cx.as_mut(),
);
cx.spawn(|this, mut cx| async move {
@ -563,7 +547,7 @@ impl Buffer {
this.did_reload(
this.version(),
this.as_rope().fingerprint(),
this.line_ending,
this.line_ending(),
new_mtime,
cx,
);
@ -588,14 +572,14 @@ impl Buffer {
) {
self.saved_version = version;
self.saved_version_fingerprint = fingerprint;
self.line_ending = line_ending;
self.text.set_line_ending(line_ending);
self.saved_mtime = mtime;
if let Some(file) = self.file.as_ref().and_then(|f| f.as_local()) {
file.buffer_reloaded(
self.remote_id(),
&self.saved_version,
self.saved_version_fingerprint.clone(),
self.line_ending,
self.line_ending(),
self.saved_mtime,
cx,
);
@ -974,13 +958,13 @@ impl Buffer {
}
}
pub(crate) fn diff(&self, new_text: String, cx: &AppContext) -> Task<Diff> {
pub(crate) fn diff(&self, mut new_text: String, cx: &AppContext) -> Task<Diff> {
let old_text = self.as_rope().clone();
let base_version = self.version();
cx.background().spawn(async move {
let old_text = old_text.to_string();
let line_ending = LineEnding::detect(&new_text);
let new_text = new_text.replace("\r\n", "\n").replace('\r', "\n");
LineEnding::strip_carriage_returns(&mut new_text);
let changes = TextDiff::from_lines(old_text.as_str(), new_text.as_str())
.iter_all_changes()
.map(|c| (c.tag(), c.value().len()))
@ -1003,7 +987,7 @@ impl Buffer {
if self.version == diff.base_version {
self.finalize_last_transaction();
self.start_transaction();
self.line_ending = diff.line_ending;
self.text.set_line_ending(diff.line_ending);
let mut offset = diff.start_offset;
for (tag, len) in diff.changes {
let range = offset..(offset + len);
@ -1518,10 +1502,6 @@ impl Buffer {
pub fn completion_triggers(&self) -> &[String] {
&self.completion_triggers
}
pub fn line_ending(&self) -> LineEnding {
self.line_ending
}
}
#[cfg(any(test, feature = "test-support"))]
@ -2542,52 +2522,6 @@ impl std::ops::SubAssign for IndentSize {
}
}
impl LineEnding {
pub fn from_proto(style: proto::LineEnding) -> Self {
match style {
proto::LineEnding::Unix => Self::Unix,
proto::LineEnding::Windows => Self::Windows,
}
}
fn detect(text: &str) -> Self {
let text = &text[..cmp::min(text.len(), 1000)];
if let Some(ix) = text.find('\n') {
if ix == 0 || text.as_bytes()[ix - 1] != b'\r' {
Self::Unix
} else {
Self::Windows
}
} else {
Default::default()
}
}
pub fn as_str(self) -> &'static str {
match self {
LineEnding::Unix => "\n",
LineEnding::Windows => "\r\n",
}
}
pub fn to_proto(self) -> proto::LineEnding {
match self {
LineEnding::Unix => proto::LineEnding::Unix,
LineEnding::Windows => proto::LineEnding::Windows,
}
}
}
impl Default for LineEnding {
fn default() -> Self {
#[cfg(unix)]
return Self::Unix;
#[cfg(not(unix))]
return Self::Windows;
}
}
impl Completion {
pub fn sort_key(&self) -> (usize, &str) {
let kind_key = match self.lsp_completion.kind {

View file

@ -11,6 +11,20 @@ use text::*;
pub use proto::{Buffer, BufferState, LineEnding, SelectionSet};
pub fn deserialize_line_ending(message: proto::LineEnding) -> text::LineEnding {
match message {
LineEnding::Unix => text::LineEnding::Unix,
LineEnding::Windows => text::LineEnding::Windows,
}
}
pub fn serialize_line_ending(message: text::LineEnding) -> proto::LineEnding {
match message {
text::LineEnding::Unix => proto::LineEnding::Unix,
text::LineEnding::Windows => proto::LineEnding::Windows,
}
}
pub fn serialize_operation(operation: &Operation) -> proto::Operation {
proto::Operation {
variant: Some(match operation {

View file

@ -421,7 +421,7 @@ async fn test_outline(cx: &mut gpui::TestAppContext) {
async fn search<'a>(
outline: &'a Outline<Anchor>,
query: &str,
cx: &gpui::TestAppContext,
cx: &'a gpui::TestAppContext,
) -> Vec<(&'a str, Vec<usize>)> {
let matches = cx
.read(|cx| outline.search(query, cx.background().clone()))

View file

@ -20,12 +20,14 @@ use gpui::{
};
use language::{
point_to_lsp,
proto::{deserialize_anchor, deserialize_version, serialize_anchor, serialize_version},
proto::{
deserialize_anchor, deserialize_line_ending, deserialize_version, serialize_anchor,
serialize_version,
},
range_from_lsp, range_to_lsp, Anchor, Bias, Buffer, CharKind, CodeAction, CodeLabel,
Completion, Diagnostic, DiagnosticEntry, DiagnosticSet, Event as BufferEvent, File as _,
Language, LanguageRegistry, LanguageServerName, LineEnding, LocalFile, LspAdapter,
OffsetRangeExt, Operation, Patch, PointUtf16, TextBufferSnapshot, ToOffset, ToPointUtf16,
Transaction,
Language, LanguageRegistry, LanguageServerName, LocalFile, LspAdapter, OffsetRangeExt,
Operation, Patch, PointUtf16, TextBufferSnapshot, ToOffset, ToPointUtf16, Transaction,
};
use lsp::{
DiagnosticSeverity, DiagnosticTag, DocumentHighlightKind, LanguageServer, LanguageString,
@ -5542,7 +5544,7 @@ impl Project {
) -> Result<()> {
let payload = envelope.payload;
let version = deserialize_version(payload.version);
let line_ending = LineEnding::from_proto(
let line_ending = deserialize_line_ending(
proto::LineEnding::from_i32(payload.line_ending)
.ok_or_else(|| anyhow!("missing line ending"))?,
);

View file

@ -23,7 +23,7 @@ use gpui::{
Task,
};
use language::{
proto::{deserialize_version, serialize_version},
proto::{deserialize_version, serialize_line_ending, serialize_version},
Buffer, DiagnosticEntry, LineEnding, PointUtf16, Rope,
};
use lazy_static::lazy_static;
@ -1750,7 +1750,7 @@ impl language::LocalFile for File {
version: serialize_version(&version),
mtime: Some(mtime.into()),
fingerprint,
line_ending: line_ending.to_proto() as i32,
line_ending: serialize_line_ending(line_ending) as i32,
})
.log_err();
}

View file

@ -22,7 +22,7 @@ impl<T: Rng> Iterator for RandomCharIter<T> {
match self.0.gen_range(0..100) {
// whitespace
0..=19 => [' ', '\n', '\t'].choose(&mut self.0).copied(),
0..=19 => [' ', '\n', '\r', '\t'].choose(&mut self.0).copied(),
// two-byte greek letters
20..=32 => char::from_u32(self.0.gen_range(('α' as u32)..('ω' as u32 + 1))),
// // three-byte characters

View file

@ -58,19 +58,12 @@ impl Rope {
pub fn push(&mut self, text: &str) {
let mut new_chunks = SmallVec::<[_; 16]>::new();
let mut new_chunk = ArrayString::new();
let mut chars = text.chars().peekable();
while let Some(mut ch) = chars.next() {
for ch in text.chars() {
if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
new_chunks.push(Chunk(new_chunk));
new_chunk = ArrayString::new();
}
if ch == '\r' {
ch = '\n';
if chars.peek().copied() == Some('\n') {
chars.next();
}
}
new_chunk.push(ch);
}
if !new_chunk.is_empty() {

View file

@ -18,7 +18,7 @@ fn init_logger() {
#[test]
fn test_edit() {
let mut buffer = Buffer::new(0, 0, History::new("abc".into()));
let mut buffer = Buffer::new(0, 0, "abc".into());
assert_eq!(buffer.text(), "abc");
buffer.edit([(3..3, "def")]);
assert_eq!(buffer.text(), "abcdef");
@ -42,7 +42,9 @@ fn test_random_edits(mut rng: StdRng) {
let mut reference_string = RandomCharIter::new(&mut rng)
.take(reference_string_len)
.collect::<String>();
let mut buffer = Buffer::new(0, 0, History::new(reference_string.clone().into()));
let mut buffer = Buffer::new(0, 0, reference_string.clone().into());
reference_string = reference_string.replace("\r", "");
buffer.history.group_interval = Duration::from_millis(rng.gen_range(0..=200));
let mut buffer_versions = Vec::new();
log::info!(
@ -56,6 +58,8 @@ fn test_random_edits(mut rng: StdRng) {
for (old_range, new_text) in edits.iter().rev() {
reference_string.replace_range(old_range.clone(), &new_text);
}
reference_string = reference_string.replace("\r", "");
assert_eq!(buffer.text(), reference_string);
log::info!(
"buffer text {:?}, version: {:?}",
@ -148,9 +152,23 @@ fn test_random_edits(mut rng: StdRng) {
}
}
#[test]
fn test_line_endings() {
let mut buffer = Buffer::new(0, 0, "one\r\ntwo".into());
assert_eq!(buffer.text(), "one\ntwo");
assert_eq!(buffer.line_ending(), LineEnding::Windows);
buffer.check_invariants();
buffer.edit([(buffer.len()..buffer.len(), "\r\nthree")]);
buffer.edit([(0..0, "zero\r\n")]);
assert_eq!(buffer.text(), "zero\none\ntwo\nthree");
assert_eq!(buffer.line_ending(), LineEnding::Windows);
buffer.check_invariants();
}
#[test]
fn test_line_len() {
let mut buffer = Buffer::new(0, 0, History::new("".into()));
let mut buffer = Buffer::new(0, 0, "".into());
buffer.edit([(0..0, "abcd\nefg\nhij")]);
buffer.edit([(12..12, "kl\nmno")]);
buffer.edit([(18..18, "\npqrs\n")]);
@ -167,7 +185,7 @@ fn test_line_len() {
#[test]
fn test_common_prefix_at_positionn() {
let text = "a = str; b = δα";
let buffer = Buffer::new(0, 0, History::new(text.into()));
let buffer = Buffer::new(0, 0, text.into());
let offset1 = offset_after(text, "str");
let offset2 = offset_after(text, "δα");
@ -215,7 +233,7 @@ fn test_common_prefix_at_positionn() {
#[test]
fn test_text_summary_for_range() {
let buffer = Buffer::new(0, 0, History::new("ab\nefg\nhklm\nnopqrs\ntuvwxyz".into()));
let buffer = Buffer::new(0, 0, "ab\nefg\nhklm\nnopqrs\ntuvwxyz".into());
assert_eq!(
buffer.text_summary_for_range::<TextSummary, _>(1..3),
TextSummary {
@ -280,7 +298,7 @@ fn test_text_summary_for_range() {
#[test]
fn test_chars_at() {
let mut buffer = Buffer::new(0, 0, History::new("".into()));
let mut buffer = Buffer::new(0, 0, "".into());
buffer.edit([(0..0, "abcd\nefgh\nij")]);
buffer.edit([(12..12, "kl\nmno")]);
buffer.edit([(18..18, "\npqrs")]);
@ -302,7 +320,7 @@ fn test_chars_at() {
assert_eq!(chars.collect::<String>(), "PQrs");
// Regression test:
let mut buffer = Buffer::new(0, 0, History::new("".into()));
let mut buffer = Buffer::new(0, 0, "".into());
buffer.edit([(0..0, "[workspace]\nmembers = [\n \"xray_core\",\n \"xray_server\",\n \"xray_cli\",\n \"xray_wasm\",\n]\n")]);
buffer.edit([(60..60, "\n")]);
@ -312,7 +330,7 @@ fn test_chars_at() {
#[test]
fn test_anchors() {
let mut buffer = Buffer::new(0, 0, History::new("".into()));
let mut buffer = Buffer::new(0, 0, "".into());
buffer.edit([(0..0, "abc")]);
let left_anchor = buffer.anchor_before(2);
let right_anchor = buffer.anchor_after(2);
@ -430,7 +448,7 @@ fn test_anchors() {
#[test]
fn test_anchors_at_start_and_end() {
let mut buffer = Buffer::new(0, 0, History::new("".into()));
let mut buffer = Buffer::new(0, 0, "".into());
let before_start_anchor = buffer.anchor_before(0);
let after_end_anchor = buffer.anchor_after(0);
@ -453,7 +471,7 @@ fn test_anchors_at_start_and_end() {
#[test]
fn test_undo_redo() {
let mut buffer = Buffer::new(0, 0, History::new("1234".into()));
let mut buffer = Buffer::new(0, 0, "1234".into());
// Set group interval to zero so as to not group edits in the undo stack.
buffer.history.group_interval = Duration::from_secs(0);
@ -490,7 +508,7 @@ fn test_undo_redo() {
#[test]
fn test_history() {
let mut now = Instant::now();
let mut buffer = Buffer::new(0, 0, History::new("123456".into()));
let mut buffer = Buffer::new(0, 0, "123456".into());
buffer.start_transaction_at(now);
buffer.edit([(2..4, "cd")]);
@ -544,7 +562,7 @@ fn test_history() {
#[test]
fn test_finalize_last_transaction() {
let now = Instant::now();
let mut buffer = Buffer::new(0, 0, History::new("123456".into()));
let mut buffer = Buffer::new(0, 0, "123456".into());
buffer.start_transaction_at(now);
buffer.edit([(2..4, "cd")]);
@ -579,7 +597,7 @@ fn test_finalize_last_transaction() {
#[test]
fn test_edited_ranges_for_transaction() {
let now = Instant::now();
let mut buffer = Buffer::new(0, 0, History::new("1234567".into()));
let mut buffer = Buffer::new(0, 0, "1234567".into());
buffer.start_transaction_at(now);
buffer.edit([(2..4, "cd")]);
@ -618,9 +636,9 @@ fn test_edited_ranges_for_transaction() {
fn test_concurrent_edits() {
let text = "abcdef";
let mut buffer1 = Buffer::new(1, 0, History::new(text.into()));
let mut buffer2 = Buffer::new(2, 0, History::new(text.into()));
let mut buffer3 = Buffer::new(3, 0, History::new(text.into()));
let mut buffer1 = Buffer::new(1, 0, text.into());
let mut buffer2 = Buffer::new(2, 0, text.into());
let mut buffer3 = Buffer::new(3, 0, text.into());
let buf1_op = buffer1.edit([(1..2, "12")]);
assert_eq!(buffer1.text(), "a12cdef");
@ -659,7 +677,7 @@ fn test_random_concurrent_edits(mut rng: StdRng) {
let mut network = Network::new(rng.clone());
for i in 0..peers {
let mut buffer = Buffer::new(i as ReplicaId, 0, History::new(base_text.clone().into()));
let mut buffer = Buffer::new(i as ReplicaId, 0, base_text.clone().into());
buffer.history.group_interval = Duration::from_millis(rng.gen_range(0..=200));
buffers.push(buffer);
replica_ids.push(i as u16);

View file

@ -63,6 +63,7 @@ pub struct BufferSnapshot {
remote_id: u64,
visible_text: Rope,
deleted_text: Rope,
line_ending: LineEnding,
undo_map: UndoMap,
fragments: SumTree<Fragment>,
insertions: SumTree<InsertionFragment>,
@ -86,6 +87,12 @@ pub struct Transaction {
pub ranges: Vec<Range<FullOffset>>,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum LineEnding {
Unix,
Windows,
}
impl HistoryEntry {
pub fn transaction_id(&self) -> TransactionId {
self.transaction.id
@ -148,9 +155,9 @@ impl HistoryEntry {
}
#[derive(Clone)]
pub struct History {
struct History {
// TODO: Turn this into a String or Rope, maybe.
pub base_text: Arc<str>,
base_text: Arc<str>,
operations: HashMap<clock::Local, Operation>,
undo_stack: Vec<HistoryEntry>,
redo_stack: Vec<HistoryEntry>,
@ -539,13 +546,18 @@ pub struct UndoOperation {
}
impl Buffer {
pub fn new(replica_id: u16, remote_id: u64, history: History) -> Buffer {
pub fn new(replica_id: u16, remote_id: u64, mut base_text: String) -> Buffer {
let line_ending = LineEnding::detect(&base_text);
LineEnding::strip_carriage_returns(&mut base_text);
let history = History::new(base_text.into());
let mut fragments = SumTree::new();
let mut insertions = SumTree::new();
let mut local_clock = clock::Local::new(replica_id);
let mut lamport_clock = clock::Lamport::new(replica_id);
let mut version = clock::Global::new();
let visible_text = Rope::from(history.base_text.as_ref());
if visible_text.len() > 0 {
let insertion_timestamp = InsertionTimestamp {
@ -576,6 +588,7 @@ impl Buffer {
remote_id,
visible_text,
deleted_text: Rope::new(),
line_ending,
fragments,
insertions,
version,
@ -658,7 +671,7 @@ impl Buffer {
let mut new_insertions = Vec::new();
let mut insertion_offset = 0;
let mut ranges = edits
let mut edits = edits
.map(|(range, new_text)| (range.to_offset(&*self), new_text))
.peekable();
@ -666,12 +679,12 @@ impl Buffer {
RopeBuilder::new(self.visible_text.cursor(0), self.deleted_text.cursor(0));
let mut old_fragments = self.fragments.cursor::<FragmentTextSummary>();
let mut new_fragments =
old_fragments.slice(&ranges.peek().unwrap().0.start, Bias::Right, &None);
old_fragments.slice(&edits.peek().unwrap().0.start, Bias::Right, &None);
new_ropes.push_tree(new_fragments.summary().text);
let mut fragment_start = old_fragments.start().visible;
for (range, new_text) in ranges {
let new_text = new_text.into();
for (range, new_text) in edits {
let new_text = LineEnding::strip_carriage_returns_from_arc(new_text.into());
let fragment_end = old_fragments.end(&None).visible;
// If the current fragment ends before this range, then jump ahead to the first fragment
@ -714,6 +727,7 @@ impl Buffer {
// Insert the new text before any existing fragments within the range.
if !new_text.is_empty() {
let new_start = new_fragments.summary().text.visible;
edits_patch.push(Edit {
old: fragment_start..fragment_start,
new: new_start..new_start + new_text.len(),
@ -805,6 +819,10 @@ impl Buffer {
edit_op
}
pub fn set_line_ending(&mut self, line_ending: LineEnding) {
self.snapshot.line_ending = line_ending;
}
pub fn apply_ops<I: IntoIterator<Item = Operation>>(&mut self, ops: I) -> Result<()> {
let mut deferred_ops = Vec::new();
for op in ops {
@ -1412,6 +1430,8 @@ impl Buffer {
fragment_summary.text.deleted,
self.snapshot.deleted_text.len()
);
assert!(!self.text().contains("\r\n"));
}
pub fn set_group_interval(&mut self, group_interval: Duration) {
@ -1549,6 +1569,10 @@ impl BufferSnapshot {
self.visible_text.to_string()
}
pub fn line_ending(&self) -> LineEnding {
self.line_ending
}
pub fn deleted_text(&self) -> String {
self.deleted_text.to_string()
}
@ -2310,6 +2334,50 @@ impl operation_queue::Operation for Operation {
}
}
impl Default for LineEnding {
fn default() -> Self {
#[cfg(unix)]
return Self::Unix;
#[cfg(not(unix))]
return Self::CRLF;
}
}
impl LineEnding {
pub fn as_str(&self) -> &'static str {
match self {
LineEnding::Unix => "\n",
LineEnding::Windows => "\r\n",
}
}
pub fn detect(text: &str) -> Self {
if let Some(ix) = text[..cmp::min(text.len(), 1000)].find(&['\n']) {
let text = text.as_bytes();
if ix > 0 && text[ix - 1] == b'\r' {
Self::Windows
} else {
Self::Unix
}
} else {
Self::default()
}
}
pub fn strip_carriage_returns(text: &mut String) {
text.retain(|c| c != '\r')
}
fn strip_carriage_returns_from_arc(text: Arc<str>) -> Arc<str> {
if text.contains('\r') {
text.replace('\r', "").into()
} else {
text
}
}
}
pub trait ToOffset {
fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize;
}