1//! Encoding and decoding utilities using the `encoding_rs` crate.
2use std::{
3 fmt::Debug,
4 sync::{Arc, Mutex},
5};
6
7use std::sync::atomic::AtomicBool;
8
9use anyhow::Result;
10use encoding_rs::Encoding;
11
12/// A wrapper around `encoding_rs::Encoding` to implement `Send` and `Sync`.
13/// Since the reference is static, it is safe to send it across threads.
14#[derive(Copy)]
15pub struct EncodingWrapper(pub &'static Encoding);
16
17impl Debug for EncodingWrapper {
18 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19 f.debug_tuple(&format!("EncodingWrapper{:?}", self.0))
20 .field(&self.0.name())
21 .finish()
22 }
23}
24
25impl Default for EncodingWrapper {
26 fn default() -> Self {
27 EncodingWrapper(encoding_rs::UTF_8)
28 }
29}
30
31impl PartialEq for EncodingWrapper {
32 fn eq(&self, other: &Self) -> bool {
33 self.0.name() == other.0.name()
34 }
35}
36
37unsafe impl Send for EncodingWrapper {}
38unsafe impl Sync for EncodingWrapper {}
39
40impl Clone for EncodingWrapper {
41 fn clone(&self) -> Self {
42 EncodingWrapper(self.0)
43 }
44}
45
46impl EncodingWrapper {
47 pub fn new(encoding: &'static Encoding) -> EncodingWrapper {
48 EncodingWrapper(encoding)
49 }
50
51 pub fn get_encoding(&self) -> &'static Encoding {
52 self.0
53 }
54
55 pub async fn decode(
56 &mut self,
57 input: Vec<u8>,
58 force: bool,
59 detect_utf16: bool,
60 buffer_encoding: Option<Arc<Mutex<&'static Encoding>>>,
61 ) -> Result<String> {
62 // Check if the input starts with a BOM for UTF-16 encodings only if detect_utf16 is true.
63 if detect_utf16 {
64 if let Some(encoding) = match input.get(..2) {
65 Some([0xFF, 0xFE]) => Some(encoding_rs::UTF_16LE),
66 Some([0xFE, 0xFF]) => Some(encoding_rs::UTF_16BE),
67 _ => None,
68 } {
69 self.0 = encoding;
70
71 if let Some(v) = buffer_encoding
72 && let Ok(mut v) = v.lock()
73 {
74 *v = encoding;
75 }
76 }
77 }
78
79 let (cow, had_errors) = self.0.decode_with_bom_removal(&input);
80
81 if force {
82 return Ok(cow.to_string());
83 }
84
85 if !had_errors {
86 Ok(cow.to_string())
87 } else {
88 Err(anyhow::anyhow!(
89 "The file contains invalid bytes for the specified encoding: {}.\nThis usually means that the file is not a regular text file, or is encoded in a different encoding.\nContinuing to open it may result in data loss if saved.",
90 self.0.name()
91 ))
92 }
93 }
94
95 pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
96 if self.0 == encoding_rs::UTF_16BE {
97 let mut data = Vec::<u8>::with_capacity(input.len() * 2);
98
99 // Convert the input string to UTF-16BE bytes
100 let utf16be_bytes = input.encode_utf16().flat_map(|u| u.to_be_bytes());
101
102 data.extend(utf16be_bytes);
103 return Ok(data);
104 } else if self.0 == encoding_rs::UTF_16LE {
105 let mut data = Vec::<u8>::with_capacity(input.len() * 2);
106
107 // Convert the input string to UTF-16LE bytes
108 let utf16le_bytes = input.encode_utf16().flat_map(|u| u.to_le_bytes());
109
110 data.extend(utf16le_bytes);
111 return Ok(data);
112 } else {
113 let (cow, _encoding_used, _had_errors) = self.0.encode(&input);
114
115 Ok(cow.into_owned())
116 }
117 }
118}
119
120/// Convert a byte vector from a specified encoding to a UTF-8 string.
121pub async fn to_utf8(
122 input: Vec<u8>,
123 mut encoding: EncodingWrapper,
124 force: bool,
125 detect_utf16: bool,
126 buffer_encoding: Option<Arc<Mutex<&'static Encoding>>>,
127) -> Result<String> {
128 encoding
129 .decode(input, force, detect_utf16, buffer_encoding)
130 .await
131}
132
133/// Convert a UTF-8 string to a byte vector in a specified encoding.
134pub async fn from_utf8(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
135 target.encode(input).await
136}
137
138pub struct EncodingOptions {
139 pub encoding: Arc<Mutex<EncodingWrapper>>,
140 pub force: AtomicBool,
141 pub detect_utf16: AtomicBool,
142}
143
144impl Default for EncodingOptions {
145 fn default() -> Self {
146 EncodingOptions {
147 encoding: Arc::new(Mutex::new(EncodingWrapper::default())),
148 force: AtomicBool::new(false),
149 detect_utf16: AtomicBool::new(true),
150 }
151 }
152}