Fix line wrap for CJK characters (#11296)

Release Notes:

- Fixed line wrap for CJK characters. 

## Demo


https://github.com/zed-industries/zed/assets/5518/c6695bb4-b170-4ce0-9a84-c36b051de438


![diff](https://github.com/zed-industries/zed/assets/5518/318bc815-1018-485c-aa16-49c775a9f402)

Fix issues: #4623 #11202

### Render case

```
## fr

Bien démarrer avec la documentation GitHub Découvrez comment commencer à créer, à livrer et à gérer des logiciels avec GitHub. Explorez nos produits, inscrivez-vous pour obtenir un compte et connectez-vous à la plus grande communauté de développement du monde.

## zh

GitHub 入门文档 了解如何开始构建、运输和维护具有 GitHub 的软件。 了解我们的产品,注册一个帐户,与世界上最大的发展社区建立联系。

## es

Documentación sobre la introducción a GitHub Aprende cómo comenzar a crear, enviar y mantener software con GitHub. Explora nuestros productos, regístrate para una cuenta y conéctate con la comunidad de desarrollo más grande del mundo.

## kr

GitHub 설명서 시작 GitHub를 사용하여 소프트웨어 빌드, 납품 및 유지 관리를 시작하는 방법을 알아봅니다. 제품을 탐색하고, 계정에 등록하고, 세계 최대의 개발 커뮤니티와 연결합니다.

## ja

GitHub の概要に関するドキュメント GitHub を使用してソフトウェアの構築、出荷、および保守を始める方法を学びます。 当社の製品を探索し、アカウントにサインアップして、世界最大の開発コミュニティと繋がりましょう。

## pt

Documentação de introdução ao GitHub Aprenda a começar a criar, enviar e manter um software com a GitHub. Explore nossos produtos, inscreva-se em uma conta e conecte-se com a maior comunidade de desenvolvimento do mundo.

## ru

Начало работы с документацией по GitHub Узнайте, как начать создание, доставку и обслуживание программного обеспечения с помощью GitHub. Изучите наши продукты, зарегистрируйте учетную запись и присоединитесь к крупнейшему в мире сообществу разработчиков.
```
This commit is contained in:
Jason Lee 2024-07-11 03:10:19 +08:00 committed by GitHub
parent 73d7f70ff6
commit b693cbfcb7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -49,9 +49,17 @@ impl LineWrapper {
continue;
}
if prev_c == ' ' && c != ' ' && first_non_whitespace_ix.is_some() {
last_candidate_ix = ix;
last_candidate_width = width;
if Self::is_word_char(c) {
if prev_c == ' ' && c != ' ' && first_non_whitespace_ix.is_some() {
last_candidate_ix = ix;
last_candidate_width = width;
}
} else {
// CJK may not be space separated, e.g.: `Hello world你好世界`
if c != ' ' && first_non_whitespace_ix.is_some() {
last_candidate_ix = ix;
last_candidate_width = width;
}
}
if c != ' ' && first_non_whitespace_ix.is_none() {
@ -90,6 +98,31 @@ impl LineWrapper {
})
}
pub(crate) fn is_word_char(c: char) -> bool {
// ASCII alphanumeric characters, for English, numbers: `Hello123`, etc.
c.is_ascii_alphanumeric() ||
// Latin script in Unicode for French, German, Spanish, etc.
// Latin-1 Supplement
// https://en.wikipedia.org/wiki/Latin-1_Supplement
matches!(c, '\u{00C0}'..='\u{00FF}') ||
// Latin Extended-A
// https://en.wikipedia.org/wiki/Latin_Extended-A
matches!(c, '\u{0100}'..='\u{017F}') ||
// Latin Extended-B
// https://en.wikipedia.org/wiki/Latin_Extended-B
matches!(c, '\u{0180}'..='\u{024F}') ||
// Cyrillic for Russian, Ukrainian, etc.
// https://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode
matches!(c, '\u{0400}'..='\u{04FF}') ||
// Some other known special characters that should be treated as word characters,
// e.g. `a-b`, `var_name`, `I'm`, '@mention`, `#hashtag`, `100%`, `3.1415`, `2^3`, `a~b`, etc.
matches!(c, '-' | '_' | '.' | '\'' | '$' | '%' | '@' | '#' | '^' | '~') ||
// Characters that used in URL, e.g. `https://github.com/zed-industries/zed?a=1&b=2` for better wrapping a long URL.
matches!(c, '/' | ':' | '?' | '&' | '=') ||
// `⋯` character is special used in Zed, to keep this at the end of the line.
matches!(c, '⋯')
}
#[inline(always)]
fn width_for_char(&mut self, c: char) -> Pixels {
if (c as u32) < 128 {
@ -219,6 +252,59 @@ mod tests {
});
}
#[test]
fn test_is_word_char() {
#[track_caller]
fn assert_word(word: &str) {
for c in word.chars() {
assert!(LineWrapper::is_word_char(c), "assertion failed for '{}'", c);
}
}
#[track_caller]
fn assert_not_word(word: &str) {
let found = word.chars().any(|c| !LineWrapper::is_word_char(c));
assert!(found, "assertion failed for '{}'", word);
}
assert_word("Hello123");
assert_word("non-English");
assert_word("var_name");
assert_word("123456");
assert_word("3.1415");
assert_word("10^2");
assert_word("1~2");
assert_word("100%");
assert_word("@mention");
assert_word("#hashtag");
assert_word("$variable");
assert_word("more⋯");
// Space
assert_not_word("foo bar");
// URL case
assert_word("https://github.com/zed-industries/zed/");
assert_word("github.com");
assert_word("a=1&b=2");
// Latin-1 Supplement
assert_word("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
// Latin Extended-A
assert_word("ĀāĂ㥹ĆćĈĉĊċČčĎď");
// Latin Extended-B
assert_word("ƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏ");
// Cyrillic
assert_word("АБВГДЕЖЗИЙКЛМНОП");
// non-word characters
assert_not_word("你好");
assert_not_word("안녕하세요");
assert_not_word("こんにちは");
assert_not_word("😀😁😂");
assert_not_word("()[]{}<>");
}
// For compatibility with the test macro
#[cfg(target_os = "macos")]
use crate as gpui;