diff --git a/Cargo.lock b/Cargo.lock index 146f0e19741610d3676d7781fa74982ff2e55918..86b551b1895a0fd6747c35c3fcfe3859396665fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2667,9 +2667,9 @@ dependencies = [ [[package]] name = "cap-fs-ext" -version = "3.4.5" +version = "3.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5528f85b1e134ae811704e41ef80930f56e795923f866813255bc342cc20654" +checksum = "e41cc18551193fe8fa6f15c1e3c799bc5ec9e2cfbfaa8ed46f37013e3e6c173c" dependencies = [ "cap-primitives", "cap-std", @@ -2679,9 +2679,9 @@ dependencies = [ [[package]] name = "cap-net-ext" -version = "3.4.5" +version = "3.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20a158160765c6a7d0d8c072a53d772e4cb243f38b04bfcf6b4939cfbe7482e7" +checksum = "9f83833816c66c986e913b22ac887cec216ea09301802054316fc5301809702c" dependencies = [ "cap-primitives", "cap-std", @@ -2691,9 +2691,9 @@ dependencies = [ [[package]] name = "cap-primitives" -version = "3.4.5" +version = "3.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6cf3aea8a5081171859ef57bc1606b1df6999df4f1110f8eef68b30098d1d3a" +checksum = "0a1e394ed14f39f8bc26f59d4c0c010dbe7f0a1b9bafff451b1f98b67c8af62a" dependencies = [ "ambient-authority", "fs-set-times", @@ -2709,9 +2709,9 @@ dependencies = [ [[package]] name = "cap-rand" -version = "3.4.5" +version = "3.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8144c22e24bbcf26ade86cb6501a0916c46b7e4787abdb0045a467eb1645a1d" +checksum = "0acb89ccf798a28683f00089d0630dfaceec087234eae0d308c05ddeaa941b40" dependencies = [ "ambient-authority", "rand 0.8.5", @@ -2719,9 +2719,9 @@ dependencies = [ [[package]] name = "cap-std" -version = "3.4.5" +version = "3.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6dc3090992a735d23219de5c204927163d922f42f575a0189b005c62d37549a" +checksum = "07c0355ca583dd58f176c3c12489d684163861ede3c9efa6fd8bba314c984189" dependencies = [ "cap-primitives", "io-extras", @@ -2731,9 +2731,9 @@ dependencies = [ [[package]] name = "cap-time-ext" -version = "3.4.5" +version = "3.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "def102506ce40c11710a9b16e614af0cde8e76ae51b1f48c04b8d79f4b671a80" +checksum = "491af520b8770085daa0466978c75db90368c71896523f2464214e38359b1a5b" dependencies = [ "ambient-authority", "cap-primitives", @@ -2896,6 +2896,17 @@ dependencies = [ "util", ] +[[package]] +name = "chardetng" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea" +dependencies = [ + "cfg-if", + "encoding_rs", + "memchr", +] + [[package]] name = "chrono" version = "0.4.42" @@ -8797,6 +8808,7 @@ dependencies = [ "ctor", "diffy", "ec4rs", + "encoding_rs", "fs", "futures 0.3.31", "fuzzy", @@ -12465,6 +12477,7 @@ dependencies = [ "dap", "dap_adapters", "db", + "encoding_rs", "extension", "fancy-regex", "fs", @@ -20231,8 +20244,10 @@ version = "0.1.0" dependencies = [ "anyhow", "async-lock 2.8.0", + "chardetng", "clock", "collections", + "encoding_rs", "fs", "futures 0.3.31", "fuzzy", diff --git a/Cargo.toml b/Cargo.toml index 13bb4ceea133e16e8cf89461cd1fe7084d448eae..703a34b63af901886e861dba3177e58b19c223f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -478,6 +478,7 @@ bytes = "1.0" cargo_metadata = "0.19" cargo_toml = "0.21" cfg-if = "1.0.3" +chardetng = "0.1" chrono = { version = "0.4", features = ["serde"] } ciborium = "0.2" circular-buffer = "1.0" @@ -501,6 +502,7 @@ dotenvy = "0.15.0" ec4rs = "1.1" emojis = "0.6.1" env_logger = "0.11" +encoding_rs = "0.8" exec = "0.3.1" fancy-regex = "0.16.0" fork = "0.4.0" diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 1b84197471bd9ad65dc0ac31bf42c6ddc5ee3bf5..48e59f7b7420473054214572a2908215f98ffded 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -69,7 +69,6 @@ use util::{ use workspace::{ CloseActiveItem, CloseAllItems, CloseOtherItems, MoveItemToPaneInDirection, NavigationEntry, OpenOptions, ViewId, - invalid_item_view::InvalidItemView, item::{FollowEvent, FollowableItem, Item, ItemHandle, SaveOptions}, register_project_item, }; @@ -27667,11 +27666,10 @@ async fn test_non_utf_8_opens(cx: &mut TestAppContext) { }) .await .unwrap(); - - assert_eq!( - handle.to_any_view().entity_type(), - TypeId::of::() - ); + // The test file content `vec![0xff, 0xfe, ...]` starts with a UTF-16 LE BOM. + // Previously, this fell back to `InvalidItemView` because it wasn't valid UTF-8. + // With auto-detection enabled, this is now recognized as UTF-16 and opens in the Editor. + assert_eq!(handle.to_any_view().entity_type(), TypeId::of::()); } #[gpui::test] diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index 3ba93476d2a9fa5371b9d146cfc0c5833a748842..06d41e729bfabbf4f7e050409d2675dd909941d6 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -32,6 +32,7 @@ async-trait.workspace = true clock.workspace = true collections.workspace = true ec4rs.workspace = true +encoding_rs.workspace = true fs.workspace = true futures.workspace = true fuzzy.workspace = true diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 39003773f83718c6c61d4cfda55b9528f7c6eb2a..abf4d9b10a761b9c0247145e8ddb0664127756d2 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -25,6 +25,7 @@ use anyhow::{Context as _, Result}; use clock::Lamport; pub use clock::ReplicaId; use collections::{HashMap, HashSet}; +use encoding_rs::Encoding; use fs::MTime; use futures::channel::oneshot; use gpui::{ @@ -131,6 +132,8 @@ pub struct Buffer { change_bits: Vec>>, _subscriptions: Vec, tree_sitter_data: Arc, + encoding: &'static Encoding, + has_bom: bool, } #[derive(Debug)] @@ -1100,6 +1103,8 @@ impl Buffer { has_conflict: false, change_bits: Default::default(), _subscriptions: Vec::new(), + encoding: encoding_rs::UTF_8, + has_bom: false, } } @@ -1383,6 +1388,26 @@ impl Buffer { self.saved_mtime } + /// Returns the character encoding of the buffer's file. + pub fn encoding(&self) -> &'static Encoding { + self.encoding + } + + /// Sets the character encoding of the buffer. + pub fn set_encoding(&mut self, encoding: &'static Encoding) { + self.encoding = encoding; + } + + /// Returns whether the buffer has a Byte Order Mark. + pub fn has_bom(&self) -> bool { + self.has_bom + } + + /// Sets whether the buffer has a Byte Order Mark. + pub fn set_has_bom(&mut self, has_bom: bool) { + self.has_bom = has_bom; + } + /// Assign a language to the buffer. pub fn set_language_async(&mut self, language: Option>, cx: &mut Context) { self.set_language_(language, cfg!(any(test, feature = "test-support")), cx); diff --git a/crates/project/Cargo.toml b/crates/project/Cargo.toml index f39c368218511b6ddf560dda1198ef5c06bd0a2e..0d264f9e58363f5e8d8e23dff565d512f118a8d1 100644 --- a/crates/project/Cargo.toml +++ b/crates/project/Cargo.toml @@ -40,6 +40,7 @@ clock.workspace = true collections.workspace = true context_server.workspace = true dap.workspace = true +encoding_rs.workspace = true extension.workspace = true fancy-regex.workspace = true fs.workspace = true diff --git a/crates/project/src/buffer_store.rs b/crates/project/src/buffer_store.rs index aea2482c83edb952f3b0dba03a510085c7c4d3f6..22106fa368904d91a5c3da4338e1a79cef7f0fd0 100644 --- a/crates/project/src/buffer_store.rs +++ b/crates/project/src/buffer_store.rs @@ -376,6 +376,8 @@ impl LocalBufferStore { let text = buffer.as_rope().clone(); let line_ending = buffer.line_ending(); + let encoding = buffer.encoding(); + let has_bom = buffer.has_bom(); let version = buffer.version(); let buffer_id = buffer.remote_id(); let file = buffer.file().cloned(); @@ -387,7 +389,7 @@ impl LocalBufferStore { } let save = worktree.update(cx, |worktree, cx| { - worktree.write_file(path, text, line_ending, cx) + worktree.write_file(path, text, line_ending, encoding, has_bom, cx) }); cx.spawn(async move |this, cx| { @@ -630,7 +632,11 @@ impl LocalBufferStore { }) .await; cx.insert_entity(reservation, |_| { - Buffer::build(text_buffer, Some(loaded.file), Capability::ReadWrite) + let mut buffer = + Buffer::build(text_buffer, Some(loaded.file), Capability::ReadWrite); + buffer.set_encoding(loaded.encoding); + buffer.set_has_bom(loaded.has_bom); + buffer })? } Err(error) if is_not_found_error(&error) => cx.new(|cx| { diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 8b57413b22ac95a16e35a95d70a04b3ae49d4b31..5e31f2a90cf137f1e4d788952832e1eb2ee0ec35 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -65,6 +65,7 @@ use debugger::{ dap_store::{DapStore, DapStoreEvent}, session::Session, }; +use encoding_rs; pub use environment::ProjectEnvironment; #[cfg(test)] use futures::future::join_all; @@ -5461,13 +5462,22 @@ impl Project { .await .context("Failed to load settings file")?; + let has_bom = file.has_bom; + let new_text = cx.read_global::(|store, cx| { store.new_text_for_update(file.text, move |settings| update(settings, cx)) })?; worktree .update(cx, |worktree, cx| { let line_ending = text::LineEnding::detect(&new_text); - worktree.write_file(rel_path.clone(), new_text.into(), line_ending, cx) + worktree.write_file( + rel_path.clone(), + new_text.into(), + line_ending, + encoding_rs::UTF_8, + has_bom, + cx, + ) })? .await .context("Failed to write settings file")?; diff --git a/crates/vim/src/command.rs b/crates/vim/src/command.rs index 5bf0fca041cf274f38c84031e35903c9e339cc24..205097130d152fe255feb02a449956124586d8e6 100644 --- a/crates/vim/src/command.rs +++ b/crates/vim/src/command.rs @@ -330,10 +330,12 @@ pub fn register(editor: &mut Editor, cx: &mut Context) { let Some(range) = range.buffer_range(vim, editor, window, cx).ok() else { return; }; - let Some((line_ending, text, whole_buffer)) = editor.buffer().update(cx, |multi, cx| { + let Some((line_ending, encoding, has_bom, text, whole_buffer)) = editor.buffer().update(cx, |multi, cx| { Some(multi.as_singleton()?.update(cx, |buffer, _| { ( buffer.line_ending(), + buffer.encoding(), + buffer.has_bom(), buffer.as_rope().slice_rows(range.start.0..range.end.0 + 1), range.start.0 == 0 && range.end.0 + 1 >= buffer.row_count(), ) @@ -429,7 +431,7 @@ pub fn register(editor: &mut Editor, cx: &mut Context) { return; }; worktree - .write_file(path.into_arc(), text.clone(), line_ending, cx) + .write_file(path.into_arc(), text.clone(), line_ending, encoding, has_bom, cx) .detach_and_prompt_err("Failed to write lines", window, cx, |_, _, _| None); }); }) diff --git a/crates/worktree/Cargo.toml b/crates/worktree/Cargo.toml index 6d132fbd2cb8c7a1282bffcea6577260a15c4572..e7d3ac34e1886bd76e0a0f5d23ea981b6626909a 100644 --- a/crates/worktree/Cargo.toml +++ b/crates/worktree/Cargo.toml @@ -25,8 +25,10 @@ test-support = [ [dependencies] anyhow.workspace = true async-lock.workspace = true +chardetng.workspace = true clock.workspace = true collections.workspace = true +encoding_rs.workspace = true fs.workspace = true futures.workspace = true fuzzy.workspace = true diff --git a/crates/worktree/src/worktree.rs b/crates/worktree/src/worktree.rs index 6ec19493840da0b9de3eb55ac483488339ec5e8d..7145bccd514fbb5d6093efda765a826162c91260 100644 --- a/crates/worktree/src/worktree.rs +++ b/crates/worktree/src/worktree.rs @@ -5,8 +5,10 @@ mod worktree_tests; use ::ignore::gitignore::{Gitignore, GitignoreBuilder}; use anyhow::{Context as _, Result, anyhow}; +use chardetng::EncodingDetector; use clock::ReplicaId; use collections::{HashMap, HashSet, VecDeque}; +use encoding_rs::Encoding; use fs::{Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, read_dir_items}; use futures::{ FutureExt as _, Stream, StreamExt, @@ -105,6 +107,8 @@ pub enum CreatedEntry { pub struct LoadedFile { pub file: Arc, pub text: String, + pub encoding: &'static Encoding, + pub has_bom: bool, } pub struct LoadedBinaryFile { @@ -741,10 +745,14 @@ impl Worktree { path: Arc, text: Rope, line_ending: LineEnding, + encoding: &'static Encoding, + has_bom: bool, cx: &Context, ) -> Task>> { match self { - Worktree::Local(this) => this.write_file(path, text, line_ending, cx), + Worktree::Local(this) => { + this.write_file(path, text, line_ending, encoding, has_bom, cx) + } Worktree::Remote(_) => { Task::ready(Err(anyhow!("remote worktree can't yet write files"))) } @@ -1351,7 +1359,9 @@ impl LocalWorktree { anyhow::bail!("File is too large to load"); } } - let text = fs.load(&abs_path).await?; + + let content = fs.load_bytes(&abs_path).await?; + let (text, encoding, has_bom) = decode_byte(content); let worktree = this.upgrade().context("worktree was dropped")?; let file = match entry.await? { @@ -1379,7 +1389,12 @@ impl LocalWorktree { } }; - Ok(LoadedFile { file, text }) + Ok(LoadedFile { + file, + text, + encoding, + has_bom, + }) }) } @@ -1462,6 +1477,8 @@ impl LocalWorktree { path: Arc, text: Rope, line_ending: LineEnding, + encoding: &'static Encoding, + has_bom: bool, cx: &Context, ) -> Task>> { let fs = self.fs.clone(); @@ -1471,7 +1488,49 @@ impl LocalWorktree { let write = cx.background_spawn({ let fs = fs.clone(); let abs_path = abs_path.clone(); - async move { fs.save(&abs_path, &text, line_ending).await } + async move { + let bom_bytes = if has_bom { + if encoding == encoding_rs::UTF_16LE { + vec![0xFF, 0xFE] + } else if encoding == encoding_rs::UTF_16BE { + vec![0xFE, 0xFF] + } else if encoding == encoding_rs::UTF_8 { + vec![0xEF, 0xBB, 0xBF] + } else { + vec![] + } + } else { + vec![] + }; + + // For UTF-8, use the optimized `fs.save` which writes Rope chunks directly to disk + // without allocating a contiguous string. + if encoding == encoding_rs::UTF_8 && !has_bom { + return fs.save(&abs_path, &text, line_ending).await; + } + // For legacy encodings (e.g. Shift-JIS), we fall back to converting the entire Rope + // to a String/Bytes in memory before writing. + // + // Note: This is inefficient for very large files compared to the streaming approach above, + // but supporting streaming writes for arbitrary encodings would require a significant + // refactor of the `fs` crate to expose a Writer interface. + let text_string = text.to_string(); + let normalized_text = match line_ending { + LineEnding::Unix => text_string, + LineEnding::Windows => text_string.replace('\n', "\r\n"), + }; + + let (cow, _, _) = encoding.encode(&normalized_text); + let bytes = if !bom_bytes.is_empty() { + let mut bytes = bom_bytes; + bytes.extend_from_slice(&cow); + bytes.into() + } else { + cow + }; + + fs.write(&abs_path, &bytes).await + } }); cx.spawn(async move |this, cx| { @@ -5782,3 +5841,40 @@ impl fs::Watcher for NullWatcher { Ok(()) } } + +fn decode_byte(bytes: Vec) -> (String, &'static Encoding, bool) { + // check BOM + if let Some((encoding, _bom_len)) = Encoding::for_bom(&bytes) { + let (cow, _) = encoding.decode_with_bom_removal(&bytes); + return (cow.into_owned(), encoding, true); + } + + fn detect_encoding(bytes: Vec) -> (String, &'static Encoding) { + let mut detector = EncodingDetector::new(); + detector.feed(&bytes, true); + + let encoding = detector.guess(None, true); // Use None for TLD hint to ensure neutral detection logic. + + let (cow, _, _) = encoding.decode(&bytes); + (cow.into_owned(), encoding) + } + + match String::from_utf8(bytes) { + Ok(text) => { + // ISO-2022-JP (and other ISO-2022 variants) consists entirely of 7-bit ASCII bytes, + // so it is valid UTF-8. However, it contains escape sequences starting with '\x1b'. + // If we find an escape character, we double-check the encoding to prevent + // displaying raw escape sequences instead of the correct characters. + if text.contains('\x1b') { + let (s, enc) = detect_encoding(text.into_bytes()); + (s, enc, false) + } else { + (text, encoding_rs::UTF_8, false) + } + } + Err(e) => { + let (s, enc) = detect_encoding(e.into_bytes()); + (s, enc, false) + } + } +} diff --git a/crates/worktree/src/worktree_tests.rs b/crates/worktree/src/worktree_tests.rs index 12f2863aab6c4b4376157f3499fa332051a4822f..094a6d52ea4168752578eab06cea511a57e65c10 100644 --- a/crates/worktree/src/worktree_tests.rs +++ b/crates/worktree/src/worktree_tests.rs @@ -1,5 +1,6 @@ use crate::{Entry, EntryKind, Event, PathChange, Worktree, WorktreeModelHandle}; -use anyhow::Result; +use anyhow::{Context as _, Result}; +use encoding_rs; use fs::{FakeFs, Fs, RealFs, RemoveOptions}; use git::{DOT_GIT, GITIGNORE, REPO_EXCLUDE}; use gpui::{AppContext as _, BackgroundExecutor, BorrowAppContext, Context, Task, TestAppContext}; @@ -19,6 +20,7 @@ use std::{ }; use util::{ ResultExt, path, + paths::PathStyle, rel_path::{RelPath, rel_path}, test::TempTree, }; @@ -723,6 +725,8 @@ async fn test_write_file(cx: &mut TestAppContext) { rel_path("tracked-dir/file.txt").into(), "hello".into(), Default::default(), + encoding_rs::UTF_8, + false, cx, ) }) @@ -734,6 +738,8 @@ async fn test_write_file(cx: &mut TestAppContext) { rel_path("ignored-dir/file.txt").into(), "world".into(), Default::default(), + encoding_rs::UTF_8, + false, cx, ) }) @@ -2035,8 +2041,14 @@ fn randomly_mutate_worktree( }) } else { log::info!("overwriting file {:?} ({})", &entry.path, entry.id.0); - let task = - worktree.write_file(entry.path.clone(), "".into(), Default::default(), cx); + let task = worktree.write_file( + entry.path.clone(), + "".into(), + Default::default(), + encoding_rs::UTF_8, + false, + cx, + ); cx.background_spawn(async move { task.await?; Ok(()) @@ -2552,3 +2564,176 @@ fn init_test(cx: &mut gpui::TestAppContext) { cx.set_global(settings_store); }); } + +#[gpui::test] +async fn test_load_file_encoding(cx: &mut TestAppContext) { + init_test(cx); + let test_cases: Vec<(&str, &[u8], &str)> = vec![ + ("utf8.txt", "こんにちは".as_bytes(), "こんにちは"), // "こんにちは" is Japanese "Hello" + ( + "sjis.txt", + &[0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd], + "こんにちは", + ), + ( + "eucjp.txt", + &[0xa4, 0xb3, 0xa4, 0xf3, 0xa4, 0xcb, 0xa4, 0xc1, 0xa4, 0xcf], + "こんにちは", + ), + ( + "iso2022jp.txt", + &[ + 0x1b, 0x24, 0x42, 0x24, 0x33, 0x24, 0x73, 0x24, 0x4b, 0x24, 0x41, 0x24, 0x4f, 0x1b, + 0x28, 0x42, + ], + "こんにちは", + ), + // Western Europe (Windows-1252) + // "Café" -> 0xE9 is 'é' in Windows-1252 (it is typically 0xC3 0xA9 in UTF-8) + ("win1252.txt", &[0x43, 0x61, 0x66, 0xe9], "Café"), + // Chinese Simplified (GBK) + // Note: We use a slightly longer string here because short byte sequences can be ambiguous + // in multi-byte encodings. Providing more context helps the heuristic detector guess correctly. + // Text: "今天天气不错" (Today's weather is not bad / nice) + // Bytes: + // 今: BD F1 + // 天: CC EC + // 天: CC EC + // 气: C6 F8 + // 不: B2 BB + // 错: B4 ED + ( + "gbk.txt", + &[ + 0xbd, 0xf1, 0xcc, 0xec, 0xcc, 0xec, 0xc6, 0xf8, 0xb2, 0xbb, 0xb4, 0xed, + ], + "今天天气不错", + ), + ( + "utf16le_bom.txt", + &[ + 0xFF, 0xFE, // BOM + 0x53, 0x30, // こ + 0x93, 0x30, // ん + 0x6B, 0x30, // に + 0x61, 0x30, // ち + 0x6F, 0x30, // は + ], + "こんにちは", + ), + ( + "utf8_bom.txt", + &[ + 0xEF, 0xBB, 0xBF, // UTF-8 BOM + 0xE3, 0x81, 0x93, // こ + 0xE3, 0x82, 0x93, // ん + 0xE3, 0x81, 0xAB, // に + 0xE3, 0x81, 0xA1, // ち + 0xE3, 0x81, 0xAF, // は + ], + "こんにちは", + ), + ]; + + let root_path = if cfg!(windows) { + Path::new("C:\\root") + } else { + Path::new("/root") + }; + + let fs = FakeFs::new(cx.background_executor.clone()); + + let mut files_json = serde_json::Map::new(); + for (name, _, _) in &test_cases { + files_json.insert(name.to_string(), serde_json::Value::String("".to_string())); + } + + for (name, bytes, _) in &test_cases { + let path = root_path.join(name); + fs.write(&path, bytes).await.unwrap(); + } + + let tree = Worktree::local( + root_path, + true, + fs, + Default::default(), + true, + &mut cx.to_async(), + ) + .await + .unwrap(); + + cx.read(|cx| tree.read(cx).as_local().unwrap().scan_complete()) + .await; + + for (name, _, expected) in test_cases { + let loaded = tree + .update(cx, |tree, cx| tree.load_file(rel_path(name), cx)) + .await + .with_context(|| format!("Failed to load {}", name)) + .unwrap(); + + assert_eq!( + loaded.text, expected, + "Encoding mismatch for file: {}", + name + ); + } +} + +#[gpui::test] +async fn test_write_file_encoding(cx: &mut gpui::TestAppContext) { + init_test(cx); + let fs = FakeFs::new(cx.executor()); + let root_path = if cfg!(windows) { + Path::new("C:\\root") + } else { + Path::new("/root") + }; + fs.create_dir(root_path).await.unwrap(); + let file_path = root_path.join("test.txt"); + + fs.insert_file(&file_path, "initial".into()).await; + + let worktree = Worktree::local( + root_path, + true, + fs.clone(), + Default::default(), + true, + &mut cx.to_async(), + ) + .await + .unwrap(); + + let path: Arc = Path::new("test.txt").into(); + let rel_path = RelPath::new(&path, PathStyle::local()).unwrap().into_arc(); + + let text = text::Rope::from("こんにちは"); + + let task = worktree.update(cx, |wt, cx| { + wt.write_file( + rel_path, + text, + text::LineEnding::Unix, + encoding_rs::SHIFT_JIS, + false, + cx, + ) + }); + + task.await.unwrap(); + + let bytes = fs.load_bytes(&file_path).await.unwrap(); + + let expected_bytes = vec![ + 0x82, 0xb1, // こ + 0x82, 0xf1, // ん + 0x82, 0xc9, // に + 0x82, 0xbf, // ち + 0x82, 0xcd, // は + ]; + + assert_eq!(bytes, expected_bytes, "Should be saved as Shift-JIS"); +}