1//! Encoding and decoding utilities using the `encoding_rs` crate.
2use std::fmt::Debug;
3
4use anyhow::{Error, Result};
5use encoding_rs::Encoding;
6use serde::{Deserialize, de::Visitor};
7
8/// A wrapper around `encoding_rs::Encoding` to implement `Send` and `Sync`.
9/// Since the reference is static, it is safe to send it across threads.
10pub struct EncodingWrapper(&'static Encoding);
11
12impl Debug for EncodingWrapper {
13 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
14 f.debug_tuple("EncodingWrapper")
15 .field(&self.0.name())
16 .finish()
17 }
18}
19
20pub struct EncodingWrapperVisitor;
21
22impl<'vi> Visitor<'vi> for EncodingWrapperVisitor {
23 type Value = EncodingWrapper;
24
25 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
26 formatter.write_str("a valid encoding name")
27 }
28
29 fn visit_str<E: serde::de::Error>(self, encoding: &str) -> Result<EncodingWrapper, E> {
30 Ok(EncodingWrapper(
31 Encoding::for_label(encoding.as_bytes())
32 .ok_or_else(|| serde::de::Error::custom("Invalid Encoding"))?,
33 ))
34 }
35
36 fn visit_string<E: serde::de::Error>(self, encoding: String) -> Result<EncodingWrapper, E> {
37 Ok(EncodingWrapper(
38 Encoding::for_label(encoding.as_bytes())
39 .ok_or_else(|| serde::de::Error::custom("Invalid Encoding"))?,
40 ))
41 }
42}
43
44impl<'de> Deserialize<'de> for EncodingWrapper {
45 fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
46 where
47 D: serde::Deserializer<'de>,
48 {
49 deserializer.deserialize_str(EncodingWrapperVisitor)
50 }
51}
52
53impl PartialEq for EncodingWrapper {
54 fn eq(&self, other: &Self) -> bool {
55 self.0.name() == other.0.name()
56 }
57}
58
59unsafe impl Send for EncodingWrapper {}
60unsafe impl Sync for EncodingWrapper {}
61
62impl Clone for EncodingWrapper {
63 fn clone(&self) -> Self {
64 EncodingWrapper(self.0)
65 }
66}
67
68impl EncodingWrapper {
69 pub fn new(encoding: &'static Encoding) -> EncodingWrapper {
70 EncodingWrapper(encoding)
71 }
72
73 pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
74 let (cow, _encoding_used, _had_errors) = self.0.decode(&input);
75 // encoding_rs handles invalid bytes by replacing them with replacement characters
76 // in the output string, so we return the result even if there were errors.
77 // This preserves the original behavior where files with invalid bytes could still be opened.
78 Ok(cow.into_owned())
79 }
80
81 pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
82 let (cow, _encoding_used, _had_errors) = self.0.encode(&input);
83 // encoding_rs handles unencodable characters by replacing them with
84 // appropriate substitutes in the output, so we return the result even if there were errors.
85 // This maintains consistency with the decode behavior.
86 Ok(cow.into_owned())
87 }
88}
89
90/// Convert a byte vector from a specified encoding to a UTF-8 string.
91pub async fn to_utf8(input: Vec<u8>, encoding: EncodingWrapper) -> Result<String> {
92 encoding.decode(input).await
93}
94
95/// Convert a UTF-8 string to a byte vector in a specified encoding.
96pub async fn from_utf8(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
97 target.encode(input).await
98}
99
100#[cfg(test)]
101mod tests {
102 use super::*;
103 use gpui::BackgroundExecutor;
104
105 #[gpui::test]
106 async fn test_decode_with_invalid_bytes(_: BackgroundExecutor) {
107 // Test that files with invalid bytes can still be decoded
108 // This is a regression test for the issue where files couldn't be opened
109 // when they contained invalid bytes for the specified encoding
110
111 // Create some invalid UTF-8 bytes
112 let invalid_bytes = vec![0xFF, 0xFE, 0x00, 0x48]; // Invalid UTF-8 sequence
113
114 let encoding = EncodingWrapper::new(encoding_rs::UTF_8);
115 let result = encoding.decode(invalid_bytes).await;
116
117 // The decode should succeed, not fail
118 assert!(result.is_ok(), "Decode should succeed even with invalid bytes");
119
120 let decoded = result.unwrap();
121 // The result should contain replacement characters for invalid sequences
122 assert!(!decoded.is_empty(), "Decoded string should not be empty");
123
124 // Test with Windows-1252 and some bytes that might be invalid
125 let maybe_invalid_bytes = vec![0x81, 0x8D, 0x8F, 0x90, 0x9D]; // Some potentially problematic bytes
126 let encoding = EncodingWrapper::new(encoding_rs::WINDOWS_1252);
127 let result = encoding.decode(maybe_invalid_bytes).await;
128
129 // Should still succeed
130 assert!(result.is_ok(), "Decode should succeed with Windows-1252 even with potentially invalid bytes");
131 }
132
133 #[gpui::test]
134 async fn test_encode_with_unencodable_chars(_: BackgroundExecutor) {
135 // Test that strings with unencodable characters can still be encoded
136 let input = "Hello δΈη π".to_string(); // Contains Unicode that may not encode to all formats
137
138 let encoding = EncodingWrapper::new(encoding_rs::WINDOWS_1252);
139 let result = encoding.encode(input).await;
140
141 // The encode should succeed, not fail
142 assert!(result.is_ok(), "Encode should succeed even with unencodable characters");
143
144 let encoded = result.unwrap();
145 assert!(!encoded.is_empty(), "Encoded bytes should not be empty");
146 }
147}