1use encoding_rs;
2use std::{
3 fmt::Debug,
4 sync::{Arc, Mutex, atomic::AtomicBool},
5};
6
7pub use encoding_rs::{
8 BIG5, EUC_JP, EUC_KR, GB18030, GBK, IBM866, ISO_2022_JP, ISO_8859_2, ISO_8859_3, ISO_8859_4,
9 ISO_8859_5, ISO_8859_6, ISO_8859_7, ISO_8859_8, ISO_8859_8_I, ISO_8859_10, ISO_8859_13,
10 ISO_8859_14, ISO_8859_15, ISO_8859_16, KOI8_R, KOI8_U, MACINTOSH, SHIFT_JIS, UTF_8, UTF_16BE,
11 UTF_16LE, WINDOWS_874, WINDOWS_1250, WINDOWS_1251, WINDOWS_1252, WINDOWS_1253, WINDOWS_1254,
12 WINDOWS_1255, WINDOWS_1256, WINDOWS_1257, WINDOWS_1258, X_MAC_CYRILLIC,
13};
14
15pub struct Encoding(Mutex<&'static encoding_rs::Encoding>);
16
17impl Debug for Encoding {
18 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19 f.debug_tuple(&format!("Encoding{:?}", self.0))
20 .field(&self.get().name())
21 .finish()
22 }
23}
24
25impl Clone for Encoding {
26 fn clone(&self) -> Self {
27 Encoding(Mutex::new(self.get()))
28 }
29}
30
31impl Default for Encoding {
32 fn default() -> Self {
33 Encoding(Mutex::new(UTF_8))
34 }
35}
36
37unsafe impl Send for Encoding {}
38unsafe impl Sync for Encoding {}
39
40impl Encoding {
41 pub fn new(encoding: &'static encoding_rs::Encoding) -> Self {
42 Self(Mutex::new(encoding))
43 }
44
45 pub fn set(&self, encoding: &'static encoding_rs::Encoding) {
46 *self.0.lock().unwrap() = encoding;
47 }
48
49 pub fn get(&self) -> &'static encoding_rs::Encoding {
50 *self.0.lock().unwrap()
51 }
52
53 pub async fn decode(
54 &self,
55 input: Vec<u8>,
56 force: bool,
57 detect_utf16: bool,
58 buffer_encoding: Option<Arc<Encoding>>,
59 ) -> anyhow::Result<String> {
60 // Check if the input starts with a BOM for UTF-16 encodings only if detect_utf16 is true.
61 if detect_utf16 {
62 if let Some(encoding) = match input.get(..2) {
63 Some([0xFF, 0xFE]) => Some(UTF_16LE),
64 Some([0xFE, 0xFF]) => Some(UTF_16BE),
65 _ => None,
66 } {
67 self.set(encoding);
68
69 if let Some(v) = buffer_encoding {
70 v.set(encoding)
71 }
72 }
73 }
74
75 let (cow, had_errors) = self.get().decode_with_bom_removal(&input);
76
77 if force {
78 return Ok(cow.to_string());
79 }
80
81 if !had_errors {
82 Ok(cow.to_string())
83 } else {
84 Err(anyhow::anyhow!(
85 "The file contains invalid bytes for the specified encoding: {}.\nThis usually means that the file is not a regular text file, or is encoded in a different encoding.\nContinuing to open it may result in data loss if saved.",
86 self.get().name()
87 ))
88 }
89 }
90
91 pub async fn encode(&self, input: String) -> anyhow::Result<Vec<u8>> {
92 if self.get() == UTF_16BE {
93 let mut data = Vec::<u8>::with_capacity(input.len() * 2);
94
95 // Convert the input string to UTF-16BE bytes
96 let utf16be_bytes = input.encode_utf16().flat_map(|u| u.to_be_bytes());
97
98 data.extend(utf16be_bytes);
99 return Ok(data);
100 } else if self.get() == UTF_16LE {
101 let mut data = Vec::<u8>::with_capacity(input.len() * 2);
102
103 // Convert the input string to UTF-16LE bytes
104 let utf16le_bytes = input.encode_utf16().flat_map(|u| u.to_le_bytes());
105
106 data.extend(utf16le_bytes);
107 return Ok(data);
108 } else {
109 let (cow, _encoding_used, _had_errors) = self.get().encode(&input);
110
111 Ok(cow.into_owned())
112 }
113 }
114
115 pub fn reset(&self) {
116 self.set(UTF_8);
117 }
118}
119
120/// Convert a byte vector from a specified encoding to a UTF-8 string.
121pub async fn to_utf8(
122 input: Vec<u8>,
123 encoding: Encoding,
124 force: bool,
125 detect_utf16: bool,
126 buffer_encoding: Option<Arc<Encoding>>,
127) -> anyhow::Result<String> {
128 encoding
129 .decode(input, force, detect_utf16, buffer_encoding)
130 .await
131}
132
133/// Convert a UTF-8 string to a byte vector in a specified encoding.
134pub async fn from_utf8(input: String, target: Encoding) -> anyhow::Result<Vec<u8>> {
135 target.encode(input).await
136}
137
138pub struct EncodingOptions {
139 pub encoding: Arc<Encoding>,
140 pub force: AtomicBool,
141 pub detect_utf16: AtomicBool,
142}
143
144impl EncodingOptions {
145 pub fn reset(&self) {
146 self.encoding.reset();
147
148 self.force
149 .store(false, std::sync::atomic::Ordering::Release);
150
151 self.detect_utf16
152 .store(true, std::sync::atomic::Ordering::Release);
153 }
154}
155
156impl Default for EncodingOptions {
157 fn default() -> Self {
158 EncodingOptions {
159 encoding: Arc::new(Encoding::default()),
160 force: AtomicBool::new(false),
161 detect_utf16: AtomicBool::new(true),
162 }
163 }
164}