encodings.rs

  1//! Encoding and decoding utilities using the `encoding_rs` crate.
  2use std::fmt::Debug;
  3
  4use anyhow::{Error, Result};
  5use encoding_rs::Encoding;
  6use serde::{Deserialize, de::Visitor};
  7
  8/// A wrapper around `encoding_rs::Encoding` to implement `Send` and `Sync`.
  9/// Since the reference is static, it is safe to send it across threads.
 10pub struct EncodingWrapper(&'static Encoding);
 11
 12impl Debug for EncodingWrapper {
 13    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 14        f.debug_tuple("EncodingWrapper")
 15            .field(&self.0.name())
 16            .finish()
 17    }
 18}
 19
 20pub struct EncodingWrapperVisitor;
 21
 22impl<'vi> Visitor<'vi> for EncodingWrapperVisitor {
 23    type Value = EncodingWrapper;
 24
 25    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
 26        formatter.write_str("a valid encoding name")
 27    }
 28
 29    fn visit_str<E: serde::de::Error>(self, encoding: &str) -> Result<EncodingWrapper, E> {
 30        Ok(EncodingWrapper(
 31            Encoding::for_label(encoding.as_bytes())
 32                .ok_or_else(|| serde::de::Error::custom("Invalid Encoding"))?,
 33        ))
 34    }
 35
 36    fn visit_string<E: serde::de::Error>(self, encoding: String) -> Result<EncodingWrapper, E> {
 37        Ok(EncodingWrapper(
 38            Encoding::for_label(encoding.as_bytes())
 39                .ok_or_else(|| serde::de::Error::custom("Invalid Encoding"))?,
 40        ))
 41    }
 42}
 43
 44impl<'de> Deserialize<'de> for EncodingWrapper {
 45    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
 46    where
 47        D: serde::Deserializer<'de>,
 48    {
 49        deserializer.deserialize_str(EncodingWrapperVisitor)
 50    }
 51}
 52
 53impl PartialEq for EncodingWrapper {
 54    fn eq(&self, other: &Self) -> bool {
 55        self.0.name() == other.0.name()
 56    }
 57}
 58
 59unsafe impl Send for EncodingWrapper {}
 60unsafe impl Sync for EncodingWrapper {}
 61
 62impl Clone for EncodingWrapper {
 63    fn clone(&self) -> Self {
 64        EncodingWrapper(self.0)
 65    }
 66}
 67
 68impl EncodingWrapper {
 69    pub fn new(encoding: &'static Encoding) -> EncodingWrapper {
 70        EncodingWrapper(encoding)
 71    }
 72
 73    pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
 74        let (cow, _encoding_used, _had_errors) = self.0.decode(&input);
 75        // encoding_rs handles invalid bytes by replacing them with replacement characters
 76        // in the output string, so we return the result even if there were errors.
 77        // This preserves the original behavior where files with invalid bytes could still be opened.
 78        Ok(cow.into_owned())
 79    }
 80
 81    pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
 82        let (cow, _encoding_used, _had_errors) = self.0.encode(&input);
 83        // encoding_rs handles unencodable characters by replacing them with 
 84        // appropriate substitutes in the output, so we return the result even if there were errors.
 85        // This maintains consistency with the decode behavior.
 86        Ok(cow.into_owned())
 87    }
 88}
 89
 90/// Convert a byte vector from a specified encoding to a UTF-8 string.
 91pub async fn to_utf8(input: Vec<u8>, encoding: EncodingWrapper) -> Result<String> {
 92    encoding.decode(input).await
 93}
 94
 95/// Convert a UTF-8 string to a byte vector in a specified encoding.
 96pub async fn from_utf8(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
 97    target.encode(input).await
 98}
 99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use gpui::BackgroundExecutor;
104    
105    #[gpui::test]
106    async fn test_decode_with_invalid_bytes(_: BackgroundExecutor) {
107        // Test that files with invalid bytes can still be decoded
108        // This is a regression test for the issue where files couldn't be opened
109        // when they contained invalid bytes for the specified encoding
110        
111        // Create some invalid UTF-8 bytes
112        let invalid_bytes = vec![0xFF, 0xFE, 0x00, 0x48]; // Invalid UTF-8 sequence
113        
114        let encoding = EncodingWrapper::new(encoding_rs::UTF_8);
115        let result = encoding.decode(invalid_bytes).await;
116        
117        // The decode should succeed, not fail
118        assert!(result.is_ok(), "Decode should succeed even with invalid bytes");
119        
120        let decoded = result.unwrap();
121        // The result should contain replacement characters for invalid sequences
122        assert!(!decoded.is_empty(), "Decoded string should not be empty");
123        
124        // Test with Windows-1252 and some bytes that might be invalid
125        let maybe_invalid_bytes = vec![0x81, 0x8D, 0x8F, 0x90, 0x9D]; // Some potentially problematic bytes
126        let encoding = EncodingWrapper::new(encoding_rs::WINDOWS_1252);
127        let result = encoding.decode(maybe_invalid_bytes).await;
128        
129        // Should still succeed
130        assert!(result.is_ok(), "Decode should succeed with Windows-1252 even with potentially invalid bytes");
131    }
132    
133    #[gpui::test]
134    async fn test_encode_with_unencodable_chars(_: BackgroundExecutor) {
135        // Test that strings with unencodable characters can still be encoded
136        let input = "Hello δΈ–η•Œ 🌍".to_string(); // Contains Unicode that may not encode to all formats
137        
138        let encoding = EncodingWrapper::new(encoding_rs::WINDOWS_1252);
139        let result = encoding.encode(input).await;
140        
141        // The encode should succeed, not fail
142        assert!(result.is_ok(), "Encode should succeed even with unencodable characters");
143        
144        let encoded = result.unwrap();
145        assert!(!encoded.is_empty(), "Encoded bytes should not be empty");
146    }
147}