From b693cbfcb7e0fcece6cf278b0bee0af0d6dc7bf5 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Thu, 11 Jul 2024 03:10:19 +0800 Subject: [PATCH] Fix line wrap for CJK characters (#11296) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Release Notes: - Fixed line wrap for CJK characters. ## Demo https://github.com/zed-industries/zed/assets/5518/c6695bb4-b170-4ce0-9a84-c36b051de438 ![diff](https://github.com/zed-industries/zed/assets/5518/318bc815-1018-485c-aa16-49c775a9f402) Fix issues: #4623 #11202 ### Render case ``` ## fr Bien démarrer avec la documentation GitHub Découvrez comment commencer à créer, à livrer et à gérer des logiciels avec GitHub. Explorez nos produits, inscrivez-vous pour obtenir un compte et connectez-vous à la plus grande communauté de développement du monde. ## zh GitHub 入门文档 了解如何开始构建、运输和维护具有 GitHub 的软件。 了解我们的产品,注册一个帐户,与世界上最大的发展社区建立联系。 ## es Documentación sobre la introducción a GitHub Aprende cómo comenzar a crear, enviar y mantener software con GitHub. Explora nuestros productos, regístrate para una cuenta y conéctate con la comunidad de desarrollo más grande del mundo. ## kr GitHub 설명서 시작 GitHub를 사용하여 소프트웨어 빌드, 납품 및 유지 관리를 시작하는 방법을 알아봅니다. 제품을 탐색하고, 계정에 등록하고, 세계 최대의 개발 커뮤니티와 연결합니다. ## ja GitHub の概要に関するドキュメント GitHub を使用してソフトウェアの構築、出荷、および保守を始める方法を学びます。 当社の製品を探索し、アカウントにサインアップして、世界最大の開発コミュニティと繋がりましょう。 ## pt Documentação de introdução ao GitHub Aprenda a começar a criar, enviar e manter um software com a GitHub. Explore nossos produtos, inscreva-se em uma conta e conecte-se com a maior comunidade de desenvolvimento do mundo. ## ru Начало работы с документацией по GitHub Узнайте, как начать создание, доставку и обслуживание программного обеспечения с помощью GitHub. Изучите наши продукты, зарегистрируйте учетную запись и присоединитесь к крупнейшему в мире сообществу разработчиков. ``` --- crates/gpui/src/text_system/line_wrapper.rs | 92 ++++++++++++++++++++- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/crates/gpui/src/text_system/line_wrapper.rs b/crates/gpui/src/text_system/line_wrapper.rs index 34c5ddd773..efb3f315ae 100644 --- a/crates/gpui/src/text_system/line_wrapper.rs +++ b/crates/gpui/src/text_system/line_wrapper.rs @@ -49,9 +49,17 @@ impl LineWrapper { continue; } - if prev_c == ' ' && c != ' ' && first_non_whitespace_ix.is_some() { - last_candidate_ix = ix; - last_candidate_width = width; + if Self::is_word_char(c) { + if prev_c == ' ' && c != ' ' && first_non_whitespace_ix.is_some() { + last_candidate_ix = ix; + last_candidate_width = width; + } + } else { + // CJK may not be space separated, e.g.: `Hello world你好世界` + if c != ' ' && first_non_whitespace_ix.is_some() { + last_candidate_ix = ix; + last_candidate_width = width; + } } if c != ' ' && first_non_whitespace_ix.is_none() { @@ -90,6 +98,31 @@ impl LineWrapper { }) } + pub(crate) fn is_word_char(c: char) -> bool { + // ASCII alphanumeric characters, for English, numbers: `Hello123`, etc. + c.is_ascii_alphanumeric() || + // Latin script in Unicode for French, German, Spanish, etc. + // Latin-1 Supplement + // https://en.wikipedia.org/wiki/Latin-1_Supplement + matches!(c, '\u{00C0}'..='\u{00FF}') || + // Latin Extended-A + // https://en.wikipedia.org/wiki/Latin_Extended-A + matches!(c, '\u{0100}'..='\u{017F}') || + // Latin Extended-B + // https://en.wikipedia.org/wiki/Latin_Extended-B + matches!(c, '\u{0180}'..='\u{024F}') || + // Cyrillic for Russian, Ukrainian, etc. + // https://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode + matches!(c, '\u{0400}'..='\u{04FF}') || + // Some other known special characters that should be treated as word characters, + // e.g. `a-b`, `var_name`, `I'm`, '@mention`, `#hashtag`, `100%`, `3.1415`, `2^3`, `a~b`, etc. + matches!(c, '-' | '_' | '.' | '\'' | '$' | '%' | '@' | '#' | '^' | '~') || + // Characters that used in URL, e.g. `https://github.com/zed-industries/zed?a=1&b=2` for better wrapping a long URL. + matches!(c, '/' | ':' | '?' | '&' | '=') || + // `⋯` character is special used in Zed, to keep this at the end of the line. + matches!(c, '⋯') + } + #[inline(always)] fn width_for_char(&mut self, c: char) -> Pixels { if (c as u32) < 128 { @@ -219,6 +252,59 @@ mod tests { }); } + #[test] + fn test_is_word_char() { + #[track_caller] + fn assert_word(word: &str) { + for c in word.chars() { + assert!(LineWrapper::is_word_char(c), "assertion failed for '{}'", c); + } + } + + #[track_caller] + fn assert_not_word(word: &str) { + let found = word.chars().any(|c| !LineWrapper::is_word_char(c)); + assert!(found, "assertion failed for '{}'", word); + } + + assert_word("Hello123"); + assert_word("non-English"); + assert_word("var_name"); + assert_word("123456"); + assert_word("3.1415"); + assert_word("10^2"); + assert_word("1~2"); + assert_word("100%"); + assert_word("@mention"); + assert_word("#hashtag"); + assert_word("$variable"); + assert_word("more⋯"); + + // Space + assert_not_word("foo bar"); + + // URL case + assert_word("https://github.com/zed-industries/zed/"); + assert_word("github.com"); + assert_word("a=1&b=2"); + + // Latin-1 Supplement + assert_word("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ"); + // Latin Extended-A + assert_word("ĀāĂ㥹ĆćĈĉĊċČčĎď"); + // Latin Extended-B + assert_word("ƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏ"); + // Cyrillic + assert_word("АБВГДЕЖЗИЙКЛМНОП"); + + // non-word characters + assert_not_word("你好"); + assert_not_word("안녕하세요"); + assert_not_word("こんにちは"); + assert_not_word("😀😁😂"); + assert_not_word("()[]{}<>"); + } + // For compatibility with the test macro #[cfg(target_os = "macos")] use crate as gpui;