Support opening and saving files with legacy encodings (#44819)

Ichimura Tomoo , CrazyboyQCD , and Conrad Irwin created

## Summary

Addresses #16965

This PR adds support for **opening and saving** files with legacy
encodings (non-UTF-8).
Previously, Zed failed to open files encoded in Shift-JIS, EUC-JP, Big5,
etc., displaying a "Could not open file" error screen. This PR
implements automatic encoding detection upon opening and ensures the
original encoding is preserved when saving.

## Implementation Details

1.  **Worktree (Loading)**:
* Updated `load_file` to use `chardetng` for automatic encoding
detection.
* Files are decoded to UTF-8 internal strings for editing, while
preserving the detected `Encoding` metadata.
2.  **Language / Buffer**:
* Added an `encoding` field to the `Buffer` struct to store the detected
encoding.
3.  **Worktree (Saving)**:
    * Updated `write_file` to accept the stored encoding.
    * **Performance Optimization**:
* **UTF-8 Path**: Uses the existing optimized `fs.save` (streaming
chunks directly from Rope), ensuring no performance regression for the
vast majority of files.
* **Legacy Encoding Path**: Implemented a fallback that converts the
Rope to a contiguous `String/Bytes` in memory, re-encodes it to the
target format (e.g., Shift-JIS), and writes it to disk.
* *Note*: This fallback involves memory allocation, but it is necessary
to support legacy encodings without refactoring the `fs` crate's
streaming interfaces.

## Changes

- `crates/worktree`:
    - Add dependencies: `encoding_rs`, `chardetng`.
    - Update `load_file` to detect encoding and decode content.
    - Update `write_file` to handle re-encoding on save.
- `crates/language`: Add `encoding` field and accessors to `Buffer`.
- `crates/project`: Pass encoding information between Worktree and
Buffer.
- `crates/vim`: Update `:w` command to use the new `write_file`
signature.

## Verification

I validated this manually using a Rust script to generate test files
with various encodings.

**Results:**

* βœ… **Success (Opened & Saved correctly):**
    * **Japanese:** `Shift-JIS` (CP932), `EUC-JP`, `ISO-2022-JP`
    * **Chinese:** `Big5` (Traditional), `GBK/GB2312` (Simplified)
* **Western/Unicode:** `Windows-1252` (CP1252), `UTF-16LE`, `UTF-16BE`
* ⚠️ **limitations (Detection accuracy):**
* Some specific encodings like `KOI8-R` or generic `Latin1` (ISO-8859-1)
may partially display replacement characters (`?`) depending on the file
content length. This is a known limitation of the heuristic detection
library (`chardetng`) rather than the saving logic.


Release Notes:

- Added support for opening and saving files with legacy encodings
(Shift-JIS, Big5, etc.)

---------

Co-authored-by: CrazyboyQCD <53971641+CrazyboyQCD@users.noreply.github.com>
Co-authored-by: Conrad Irwin <conrad.irwin@gmail.com>

Change summary

Cargo.lock                            |  39 ++++-
Cargo.toml                            |   2 
crates/editor/src/editor_tests.rs     |  10 
crates/language/Cargo.toml            |   1 
crates/language/src/buffer.rs         |  25 +++
crates/project/Cargo.toml             |   1 
crates/project/src/buffer_store.rs    |  10 +
crates/project/src/project.rs         |  12 +
crates/vim/src/command.rs             |   6 
crates/worktree/Cargo.toml            |   2 
crates/worktree/src/worktree.rs       | 104 +++++++++++++++
crates/worktree/src/worktree_tests.rs | 191 ++++++++++++++++++++++++++++
12 files changed, 373 insertions(+), 30 deletions(-)

Detailed changes

Cargo.lock πŸ”—

@@ -2667,9 +2667,9 @@ dependencies = [
 
 [[package]]
 name = "cap-fs-ext"
-version = "3.4.5"
+version = "3.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d5528f85b1e134ae811704e41ef80930f56e795923f866813255bc342cc20654"
+checksum = "e41cc18551193fe8fa6f15c1e3c799bc5ec9e2cfbfaa8ed46f37013e3e6c173c"
 dependencies = [
  "cap-primitives",
  "cap-std",
@@ -2679,9 +2679,9 @@ dependencies = [
 
 [[package]]
 name = "cap-net-ext"
-version = "3.4.5"
+version = "3.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20a158160765c6a7d0d8c072a53d772e4cb243f38b04bfcf6b4939cfbe7482e7"
+checksum = "9f83833816c66c986e913b22ac887cec216ea09301802054316fc5301809702c"
 dependencies = [
  "cap-primitives",
  "cap-std",
@@ -2691,9 +2691,9 @@ dependencies = [
 
 [[package]]
 name = "cap-primitives"
-version = "3.4.5"
+version = "3.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6cf3aea8a5081171859ef57bc1606b1df6999df4f1110f8eef68b30098d1d3a"
+checksum = "0a1e394ed14f39f8bc26f59d4c0c010dbe7f0a1b9bafff451b1f98b67c8af62a"
 dependencies = [
  "ambient-authority",
  "fs-set-times",
@@ -2709,9 +2709,9 @@ dependencies = [
 
 [[package]]
 name = "cap-rand"
-version = "3.4.5"
+version = "3.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8144c22e24bbcf26ade86cb6501a0916c46b7e4787abdb0045a467eb1645a1d"
+checksum = "0acb89ccf798a28683f00089d0630dfaceec087234eae0d308c05ddeaa941b40"
 dependencies = [
  "ambient-authority",
  "rand 0.8.5",
@@ -2719,9 +2719,9 @@ dependencies = [
 
 [[package]]
 name = "cap-std"
-version = "3.4.5"
+version = "3.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6dc3090992a735d23219de5c204927163d922f42f575a0189b005c62d37549a"
+checksum = "07c0355ca583dd58f176c3c12489d684163861ede3c9efa6fd8bba314c984189"
 dependencies = [
  "cap-primitives",
  "io-extras",
@@ -2731,9 +2731,9 @@ dependencies = [
 
 [[package]]
 name = "cap-time-ext"
-version = "3.4.5"
+version = "3.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "def102506ce40c11710a9b16e614af0cde8e76ae51b1f48c04b8d79f4b671a80"
+checksum = "491af520b8770085daa0466978c75db90368c71896523f2464214e38359b1a5b"
 dependencies = [
  "ambient-authority",
  "cap-primitives",
@@ -2896,6 +2896,17 @@ dependencies = [
  "util",
 ]
 
+[[package]]
+name = "chardetng"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
+dependencies = [
+ "cfg-if",
+ "encoding_rs",
+ "memchr",
+]
+
 [[package]]
 name = "chrono"
 version = "0.4.42"
@@ -8797,6 +8808,7 @@ dependencies = [
  "ctor",
  "diffy",
  "ec4rs",
+ "encoding_rs",
  "fs",
  "futures 0.3.31",
  "fuzzy",
@@ -12465,6 +12477,7 @@ dependencies = [
  "dap",
  "dap_adapters",
  "db",
+ "encoding_rs",
  "extension",
  "fancy-regex",
  "fs",
@@ -20231,8 +20244,10 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "async-lock 2.8.0",
+ "chardetng",
  "clock",
  "collections",
+ "encoding_rs",
  "fs",
  "futures 0.3.31",
  "fuzzy",

Cargo.toml πŸ”—

@@ -478,6 +478,7 @@ bytes = "1.0"
 cargo_metadata = "0.19"
 cargo_toml = "0.21"
 cfg-if = "1.0.3"
+chardetng = "0.1"
 chrono = { version = "0.4", features = ["serde"] }
 ciborium = "0.2"
 circular-buffer = "1.0"
@@ -501,6 +502,7 @@ dotenvy = "0.15.0"
 ec4rs = "1.1"
 emojis = "0.6.1"
 env_logger = "0.11"
+encoding_rs = "0.8"
 exec = "0.3.1"
 fancy-regex = "0.16.0"
 fork = "0.4.0"

crates/editor/src/editor_tests.rs πŸ”—

@@ -69,7 +69,6 @@ use util::{
 use workspace::{
     CloseActiveItem, CloseAllItems, CloseOtherItems, MoveItemToPaneInDirection, NavigationEntry,
     OpenOptions, ViewId,
-    invalid_item_view::InvalidItemView,
     item::{FollowEvent, FollowableItem, Item, ItemHandle, SaveOptions},
     register_project_item,
 };
@@ -27667,11 +27666,10 @@ async fn test_non_utf_8_opens(cx: &mut TestAppContext) {
         })
         .await
         .unwrap();
-
-    assert_eq!(
-        handle.to_any_view().entity_type(),
-        TypeId::of::<InvalidItemView>()
-    );
+    // The test file content `vec![0xff, 0xfe, ...]` starts with a UTF-16 LE BOM.
+    // Previously, this fell back to `InvalidItemView` because it wasn't valid UTF-8.
+    // With auto-detection enabled, this is now recognized as UTF-16 and opens in the Editor.
+    assert_eq!(handle.to_any_view().entity_type(), TypeId::of::<Editor>());
 }
 
 #[gpui::test]

crates/language/Cargo.toml πŸ”—

@@ -32,6 +32,7 @@ async-trait.workspace = true
 clock.workspace = true
 collections.workspace = true
 ec4rs.workspace = true
+encoding_rs.workspace = true
 fs.workspace = true
 futures.workspace = true
 fuzzy.workspace = true

crates/language/src/buffer.rs πŸ”—

@@ -25,6 +25,7 @@ use anyhow::{Context as _, Result};
 use clock::Lamport;
 pub use clock::ReplicaId;
 use collections::{HashMap, HashSet};
+use encoding_rs::Encoding;
 use fs::MTime;
 use futures::channel::oneshot;
 use gpui::{
@@ -131,6 +132,8 @@ pub struct Buffer {
     change_bits: Vec<rc::Weak<Cell<bool>>>,
     _subscriptions: Vec<gpui::Subscription>,
     tree_sitter_data: Arc<TreeSitterData>,
+    encoding: &'static Encoding,
+    has_bom: bool,
 }
 
 #[derive(Debug)]
@@ -1100,6 +1103,8 @@ impl Buffer {
             has_conflict: false,
             change_bits: Default::default(),
             _subscriptions: Vec::new(),
+            encoding: encoding_rs::UTF_8,
+            has_bom: false,
         }
     }
 
@@ -1383,6 +1388,26 @@ impl Buffer {
         self.saved_mtime
     }
 
+    /// Returns the character encoding of the buffer's file.
+    pub fn encoding(&self) -> &'static Encoding {
+        self.encoding
+    }
+
+    /// Sets the character encoding of the buffer.
+    pub fn set_encoding(&mut self, encoding: &'static Encoding) {
+        self.encoding = encoding;
+    }
+
+    /// Returns whether the buffer has a Byte Order Mark.
+    pub fn has_bom(&self) -> bool {
+        self.has_bom
+    }
+
+    /// Sets whether the buffer has a Byte Order Mark.
+    pub fn set_has_bom(&mut self, has_bom: bool) {
+        self.has_bom = has_bom;
+    }
+
     /// Assign a language to the buffer.
     pub fn set_language_async(&mut self, language: Option<Arc<Language>>, cx: &mut Context<Self>) {
         self.set_language_(language, cfg!(any(test, feature = "test-support")), cx);

crates/project/Cargo.toml πŸ”—

@@ -40,6 +40,7 @@ clock.workspace = true
 collections.workspace = true
 context_server.workspace = true
 dap.workspace = true
+encoding_rs.workspace = true
 extension.workspace = true
 fancy-regex.workspace = true
 fs.workspace = true

crates/project/src/buffer_store.rs πŸ”—

@@ -376,6 +376,8 @@ impl LocalBufferStore {
 
         let text = buffer.as_rope().clone();
         let line_ending = buffer.line_ending();
+        let encoding = buffer.encoding();
+        let has_bom = buffer.has_bom();
         let version = buffer.version();
         let buffer_id = buffer.remote_id();
         let file = buffer.file().cloned();
@@ -387,7 +389,7 @@ impl LocalBufferStore {
         }
 
         let save = worktree.update(cx, |worktree, cx| {
-            worktree.write_file(path, text, line_ending, cx)
+            worktree.write_file(path, text, line_ending, encoding, has_bom, cx)
         });
 
         cx.spawn(async move |this, cx| {
@@ -630,7 +632,11 @@ impl LocalBufferStore {
                         })
                         .await;
                     cx.insert_entity(reservation, |_| {
-                        Buffer::build(text_buffer, Some(loaded.file), Capability::ReadWrite)
+                        let mut buffer =
+                            Buffer::build(text_buffer, Some(loaded.file), Capability::ReadWrite);
+                        buffer.set_encoding(loaded.encoding);
+                        buffer.set_has_bom(loaded.has_bom);
+                        buffer
                     })?
                 }
                 Err(error) if is_not_found_error(&error) => cx.new(|cx| {

crates/project/src/project.rs πŸ”—

@@ -65,6 +65,7 @@ use debugger::{
     dap_store::{DapStore, DapStoreEvent},
     session::Session,
 };
+use encoding_rs;
 pub use environment::ProjectEnvironment;
 #[cfg(test)]
 use futures::future::join_all;
@@ -5461,13 +5462,22 @@ impl Project {
                 .await
                 .context("Failed to load settings file")?;
 
+            let has_bom = file.has_bom;
+
             let new_text = cx.read_global::<SettingsStore, _>(|store, cx| {
                 store.new_text_for_update(file.text, move |settings| update(settings, cx))
             })?;
             worktree
                 .update(cx, |worktree, cx| {
                     let line_ending = text::LineEnding::detect(&new_text);
-                    worktree.write_file(rel_path.clone(), new_text.into(), line_ending, cx)
+                    worktree.write_file(
+                        rel_path.clone(),
+                        new_text.into(),
+                        line_ending,
+                        encoding_rs::UTF_8,
+                        has_bom,
+                        cx,
+                    )
                 })?
                 .await
                 .context("Failed to write settings file")?;

crates/vim/src/command.rs πŸ”—

@@ -330,10 +330,12 @@ pub fn register(editor: &mut Editor, cx: &mut Context<Vim>) {
                 let Some(range) = range.buffer_range(vim, editor, window, cx).ok() else {
                     return;
                 };
-                let Some((line_ending, text, whole_buffer)) = editor.buffer().update(cx, |multi, cx| {
+                let Some((line_ending, encoding, has_bom, text, whole_buffer)) = editor.buffer().update(cx, |multi, cx| {
                     Some(multi.as_singleton()?.update(cx, |buffer, _| {
                         (
                             buffer.line_ending(),
+                            buffer.encoding(),
+                            buffer.has_bom(),
                             buffer.as_rope().slice_rows(range.start.0..range.end.0 + 1),
                             range.start.0 == 0 && range.end.0 + 1 >= buffer.row_count(),
                         )
@@ -429,7 +431,7 @@ pub fn register(editor: &mut Editor, cx: &mut Context<Vim>) {
                                     return;
                                 };
                                 worktree
-                                    .write_file(path.into_arc(), text.clone(), line_ending, cx)
+                                    .write_file(path.into_arc(), text.clone(), line_ending, encoding, has_bom, cx)
                                     .detach_and_prompt_err("Failed to write lines", window, cx, |_, _, _| None);
                             });
                         })

crates/worktree/Cargo.toml πŸ”—

@@ -25,8 +25,10 @@ test-support = [
 [dependencies]
 anyhow.workspace = true
 async-lock.workspace = true
+chardetng.workspace = true
 clock.workspace = true
 collections.workspace = true
+encoding_rs.workspace = true
 fs.workspace = true
 futures.workspace = true
 fuzzy.workspace = true

crates/worktree/src/worktree.rs πŸ”—

@@ -5,8 +5,10 @@ mod worktree_tests;
 
 use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
 use anyhow::{Context as _, Result, anyhow};
+use chardetng::EncodingDetector;
 use clock::ReplicaId;
 use collections::{HashMap, HashSet, VecDeque};
+use encoding_rs::Encoding;
 use fs::{Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, read_dir_items};
 use futures::{
     FutureExt as _, Stream, StreamExt,
@@ -105,6 +107,8 @@ pub enum CreatedEntry {
 pub struct LoadedFile {
     pub file: Arc<File>,
     pub text: String,
+    pub encoding: &'static Encoding,
+    pub has_bom: bool,
 }
 
 pub struct LoadedBinaryFile {
@@ -741,10 +745,14 @@ impl Worktree {
         path: Arc<RelPath>,
         text: Rope,
         line_ending: LineEnding,
+        encoding: &'static Encoding,
+        has_bom: bool,
         cx: &Context<Worktree>,
     ) -> Task<Result<Arc<File>>> {
         match self {
-            Worktree::Local(this) => this.write_file(path, text, line_ending, cx),
+            Worktree::Local(this) => {
+                this.write_file(path, text, line_ending, encoding, has_bom, cx)
+            }
             Worktree::Remote(_) => {
                 Task::ready(Err(anyhow!("remote worktree can't yet write files")))
             }
@@ -1351,7 +1359,9 @@ impl LocalWorktree {
                     anyhow::bail!("File is too large to load");
                 }
             }
-            let text = fs.load(&abs_path).await?;
+
+            let content = fs.load_bytes(&abs_path).await?;
+            let (text, encoding, has_bom) = decode_byte(content);
 
             let worktree = this.upgrade().context("worktree was dropped")?;
             let file = match entry.await? {
@@ -1379,7 +1389,12 @@ impl LocalWorktree {
                 }
             };
 
-            Ok(LoadedFile { file, text })
+            Ok(LoadedFile {
+                file,
+                text,
+                encoding,
+                has_bom,
+            })
         })
     }
 
@@ -1462,6 +1477,8 @@ impl LocalWorktree {
         path: Arc<RelPath>,
         text: Rope,
         line_ending: LineEnding,
+        encoding: &'static Encoding,
+        has_bom: bool,
         cx: &Context<Worktree>,
     ) -> Task<Result<Arc<File>>> {
         let fs = self.fs.clone();
@@ -1471,7 +1488,49 @@ impl LocalWorktree {
         let write = cx.background_spawn({
             let fs = fs.clone();
             let abs_path = abs_path.clone();
-            async move { fs.save(&abs_path, &text, line_ending).await }
+            async move {
+                let bom_bytes = if has_bom {
+                    if encoding == encoding_rs::UTF_16LE {
+                        vec![0xFF, 0xFE]
+                    } else if encoding == encoding_rs::UTF_16BE {
+                        vec![0xFE, 0xFF]
+                    } else if encoding == encoding_rs::UTF_8 {
+                        vec![0xEF, 0xBB, 0xBF]
+                    } else {
+                        vec![]
+                    }
+                } else {
+                    vec![]
+                };
+
+                // For UTF-8, use the optimized `fs.save` which writes Rope chunks directly to disk
+                // without allocating a contiguous string.
+                if encoding == encoding_rs::UTF_8 && !has_bom {
+                    return fs.save(&abs_path, &text, line_ending).await;
+                }
+                // For legacy encodings (e.g. Shift-JIS), we fall back to converting the entire Rope
+                // to a String/Bytes in memory before writing.
+                //
+                // Note: This is inefficient for very large files compared to the streaming approach above,
+                // but supporting streaming writes for arbitrary encodings would require a significant
+                // refactor of the `fs` crate to expose a Writer interface.
+                let text_string = text.to_string();
+                let normalized_text = match line_ending {
+                    LineEnding::Unix => text_string,
+                    LineEnding::Windows => text_string.replace('\n', "\r\n"),
+                };
+
+                let (cow, _, _) = encoding.encode(&normalized_text);
+                let bytes = if !bom_bytes.is_empty() {
+                    let mut bytes = bom_bytes;
+                    bytes.extend_from_slice(&cow);
+                    bytes.into()
+                } else {
+                    cow
+                };
+
+                fs.write(&abs_path, &bytes).await
+            }
         });
 
         cx.spawn(async move |this, cx| {
@@ -5782,3 +5841,40 @@ impl fs::Watcher for NullWatcher {
         Ok(())
     }
 }
+
+fn decode_byte(bytes: Vec<u8>) -> (String, &'static Encoding, bool) {
+    // check BOM
+    if let Some((encoding, _bom_len)) = Encoding::for_bom(&bytes) {
+        let (cow, _) = encoding.decode_with_bom_removal(&bytes);
+        return (cow.into_owned(), encoding, true);
+    }
+
+    fn detect_encoding(bytes: Vec<u8>) -> (String, &'static Encoding) {
+        let mut detector = EncodingDetector::new();
+        detector.feed(&bytes, true);
+
+        let encoding = detector.guess(None, true); // Use None for TLD hint to ensure neutral detection logic.
+
+        let (cow, _, _) = encoding.decode(&bytes);
+        (cow.into_owned(), encoding)
+    }
+
+    match String::from_utf8(bytes) {
+        Ok(text) => {
+            // ISO-2022-JP (and other ISO-2022 variants) consists entirely of 7-bit ASCII bytes,
+            // so it is valid UTF-8. However, it contains escape sequences starting with '\x1b'.
+            // If we find an escape character, we double-check the encoding to prevent
+            // displaying raw escape sequences instead of the correct characters.
+            if text.contains('\x1b') {
+                let (s, enc) = detect_encoding(text.into_bytes());
+                (s, enc, false)
+            } else {
+                (text, encoding_rs::UTF_8, false)
+            }
+        }
+        Err(e) => {
+            let (s, enc) = detect_encoding(e.into_bytes());
+            (s, enc, false)
+        }
+    }
+}

crates/worktree/src/worktree_tests.rs πŸ”—

@@ -1,5 +1,6 @@
 use crate::{Entry, EntryKind, Event, PathChange, Worktree, WorktreeModelHandle};
-use anyhow::Result;
+use anyhow::{Context as _, Result};
+use encoding_rs;
 use fs::{FakeFs, Fs, RealFs, RemoveOptions};
 use git::{DOT_GIT, GITIGNORE, REPO_EXCLUDE};
 use gpui::{AppContext as _, BackgroundExecutor, BorrowAppContext, Context, Task, TestAppContext};
@@ -19,6 +20,7 @@ use std::{
 };
 use util::{
     ResultExt, path,
+    paths::PathStyle,
     rel_path::{RelPath, rel_path},
     test::TempTree,
 };
@@ -723,6 +725,8 @@ async fn test_write_file(cx: &mut TestAppContext) {
                 rel_path("tracked-dir/file.txt").into(),
                 "hello".into(),
                 Default::default(),
+                encoding_rs::UTF_8,
+                false,
                 cx,
             )
         })
@@ -734,6 +738,8 @@ async fn test_write_file(cx: &mut TestAppContext) {
                 rel_path("ignored-dir/file.txt").into(),
                 "world".into(),
                 Default::default(),
+                encoding_rs::UTF_8,
+                false,
                 cx,
             )
         })
@@ -2035,8 +2041,14 @@ fn randomly_mutate_worktree(
                 })
             } else {
                 log::info!("overwriting file {:?} ({})", &entry.path, entry.id.0);
-                let task =
-                    worktree.write_file(entry.path.clone(), "".into(), Default::default(), cx);
+                let task = worktree.write_file(
+                    entry.path.clone(),
+                    "".into(),
+                    Default::default(),
+                    encoding_rs::UTF_8,
+                    false,
+                    cx,
+                );
                 cx.background_spawn(async move {
                     task.await?;
                     Ok(())
@@ -2552,3 +2564,176 @@ fn init_test(cx: &mut gpui::TestAppContext) {
         cx.set_global(settings_store);
     });
 }
+
+#[gpui::test]
+async fn test_load_file_encoding(cx: &mut TestAppContext) {
+    init_test(cx);
+    let test_cases: Vec<(&str, &[u8], &str)> = vec![
+        ("utf8.txt", "こんにけは".as_bytes(), "こんにけは"), // "こんにけは" is Japanese "Hello"
+        (
+            "sjis.txt",
+            &[0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd],
+            "こんにけは",
+        ),
+        (
+            "eucjp.txt",
+            &[0xa4, 0xb3, 0xa4, 0xf3, 0xa4, 0xcb, 0xa4, 0xc1, 0xa4, 0xcf],
+            "こんにけは",
+        ),
+        (
+            "iso2022jp.txt",
+            &[
+                0x1b, 0x24, 0x42, 0x24, 0x33, 0x24, 0x73, 0x24, 0x4b, 0x24, 0x41, 0x24, 0x4f, 0x1b,
+                0x28, 0x42,
+            ],
+            "こんにけは",
+        ),
+        // Western Europe (Windows-1252)
+        // "CafΓ©" -> 0xE9 is 'Γ©' in Windows-1252 (it is typically 0xC3 0xA9 in UTF-8)
+        ("win1252.txt", &[0x43, 0x61, 0x66, 0xe9], "CafΓ©"),
+        // Chinese Simplified (GBK)
+        // Note: We use a slightly longer string here because short byte sequences can be ambiguous
+        // in multi-byte encodings. Providing more context helps the heuristic detector guess correctly.
+        // Text: "δ»Šε€©ε€©ζ°”δΈι”™" (Today's weather is not bad / nice)
+        // Bytes:
+        //   今: BD F1
+        //   倩: CC EC
+        //   倩: CC EC
+        //   ζ°”: C6 F8
+        //   不: B2 BB
+        //   ι”™: B4 ED
+        (
+            "gbk.txt",
+            &[
+                0xbd, 0xf1, 0xcc, 0xec, 0xcc, 0xec, 0xc6, 0xf8, 0xb2, 0xbb, 0xb4, 0xed,
+            ],
+            "δ»Šε€©ε€©ζ°”δΈι”™",
+        ),
+        (
+            "utf16le_bom.txt",
+            &[
+                0xFF, 0xFE, // BOM
+                0x53, 0x30, // こ
+                0x93, 0x30, // γ‚“
+                0x6B, 0x30, // に
+                0x61, 0x30, // け
+                0x6F, 0x30, // は
+            ],
+            "こんにけは",
+        ),
+        (
+            "utf8_bom.txt",
+            &[
+                0xEF, 0xBB, 0xBF, // UTF-8 BOM
+                0xE3, 0x81, 0x93, // こ
+                0xE3, 0x82, 0x93, // γ‚“
+                0xE3, 0x81, 0xAB, // に
+                0xE3, 0x81, 0xA1, // け
+                0xE3, 0x81, 0xAF, // は
+            ],
+            "こんにけは",
+        ),
+    ];
+
+    let root_path = if cfg!(windows) {
+        Path::new("C:\\root")
+    } else {
+        Path::new("/root")
+    };
+
+    let fs = FakeFs::new(cx.background_executor.clone());
+
+    let mut files_json = serde_json::Map::new();
+    for (name, _, _) in &test_cases {
+        files_json.insert(name.to_string(), serde_json::Value::String("".to_string()));
+    }
+
+    for (name, bytes, _) in &test_cases {
+        let path = root_path.join(name);
+        fs.write(&path, bytes).await.unwrap();
+    }
+
+    let tree = Worktree::local(
+        root_path,
+        true,
+        fs,
+        Default::default(),
+        true,
+        &mut cx.to_async(),
+    )
+    .await
+    .unwrap();
+
+    cx.read(|cx| tree.read(cx).as_local().unwrap().scan_complete())
+        .await;
+
+    for (name, _, expected) in test_cases {
+        let loaded = tree
+            .update(cx, |tree, cx| tree.load_file(rel_path(name), cx))
+            .await
+            .with_context(|| format!("Failed to load {}", name))
+            .unwrap();
+
+        assert_eq!(
+            loaded.text, expected,
+            "Encoding mismatch for file: {}",
+            name
+        );
+    }
+}
+
+#[gpui::test]
+async fn test_write_file_encoding(cx: &mut gpui::TestAppContext) {
+    init_test(cx);
+    let fs = FakeFs::new(cx.executor());
+    let root_path = if cfg!(windows) {
+        Path::new("C:\\root")
+    } else {
+        Path::new("/root")
+    };
+    fs.create_dir(root_path).await.unwrap();
+    let file_path = root_path.join("test.txt");
+
+    fs.insert_file(&file_path, "initial".into()).await;
+
+    let worktree = Worktree::local(
+        root_path,
+        true,
+        fs.clone(),
+        Default::default(),
+        true,
+        &mut cx.to_async(),
+    )
+    .await
+    .unwrap();
+
+    let path: Arc<Path> = Path::new("test.txt").into();
+    let rel_path = RelPath::new(&path, PathStyle::local()).unwrap().into_arc();
+
+    let text = text::Rope::from("こんにけは");
+
+    let task = worktree.update(cx, |wt, cx| {
+        wt.write_file(
+            rel_path,
+            text,
+            text::LineEnding::Unix,
+            encoding_rs::SHIFT_JIS,
+            false,
+            cx,
+        )
+    });
+
+    task.await.unwrap();
+
+    let bytes = fs.load_bytes(&file_path).await.unwrap();
+
+    let expected_bytes = vec![
+        0x82, 0xb1, // こ
+        0x82, 0xf1, // γ‚“
+        0x82, 0xc9, // に
+        0x82, 0xbf, // け
+        0x82, 0xcd, // は
+    ];
+
+    assert_eq!(bytes, expected_bytes, "Should be saved as Shift-JIS");
+}