1//! Encoding and decoding utilities using the `encoding_rs` crate.
2use std::{
3 fmt::Debug,
4 sync::{Arc, Mutex},
5};
6
7use std::sync::atomic::AtomicBool;
8
9use anyhow::Result;
10use encoding_rs::Encoding;
11
12/// A wrapper around `encoding_rs::Encoding` to implement `Send` and `Sync`.
13/// Since the reference is static, it is safe to send it across threads.
14pub struct EncodingWrapper(pub &'static Encoding);
15
16impl Debug for EncodingWrapper {
17 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
18 f.debug_tuple(&format!("EncodingWrapper{:?}", self.0))
19 .field(&self.0.name())
20 .finish()
21 }
22}
23
24impl Default for EncodingWrapper {
25 fn default() -> Self {
26 EncodingWrapper(encoding_rs::UTF_8)
27 }
28}
29
30impl PartialEq for EncodingWrapper {
31 fn eq(&self, other: &Self) -> bool {
32 self.0.name() == other.0.name()
33 }
34}
35
36unsafe impl Send for EncodingWrapper {}
37unsafe impl Sync for EncodingWrapper {}
38
39impl Clone for EncodingWrapper {
40 fn clone(&self) -> Self {
41 EncodingWrapper(self.0)
42 }
43}
44
45impl EncodingWrapper {
46 pub fn new(encoding: &'static Encoding) -> EncodingWrapper {
47 EncodingWrapper(encoding)
48 }
49
50 pub fn get_encoding(&self) -> &'static Encoding {
51 self.0
52 }
53
54 pub async fn decode(
55 &mut self,
56 input: Vec<u8>,
57 force: bool,
58 detect_utf16: bool,
59 buffer_encoding: Option<Arc<Mutex<&'static Encoding>>>,
60 ) -> Result<String> {
61 // Check if the input starts with a BOM for UTF-16 encodings only if detect_utf16 is true.
62 println!("{}", force);
63 println!("{}", detect_utf16);
64 if detect_utf16 {
65 if let Some(encoding) = match input.get(..2) {
66 Some([0xFF, 0xFE]) => Some(encoding_rs::UTF_16LE),
67 Some([0xFE, 0xFF]) => Some(encoding_rs::UTF_16BE),
68 _ => None,
69 } {
70 self.0 = encoding;
71
72 if let Some(v) = buffer_encoding
73 && let Ok(mut v) = v.lock()
74 {
75 *v = encoding;
76 }
77 }
78 }
79
80 let (cow, had_errors) = self.0.decode_with_bom_removal(&input);
81
82 if force {
83 return Ok(cow.to_string());
84 }
85
86 if !had_errors {
87 Ok(cow.to_string())
88 } else {
89 Err(anyhow::anyhow!(
90 "The file contains invalid bytes for the specified encoding: {}.\nThis usually means that the file is not a regular text file, or is encoded in a different encoding.\nContinuing to open it may result in data loss if saved.",
91 self.0.name()
92 ))
93 }
94 }
95
96 pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
97 if self.0 == encoding_rs::UTF_16BE {
98 let mut data = Vec::<u8>::with_capacity(input.len() * 2);
99
100 // Convert the input string to UTF-16BE bytes
101 let utf16be_bytes = input.encode_utf16().flat_map(|u| u.to_be_bytes());
102
103 data.extend(utf16be_bytes);
104 return Ok(data);
105 } else if self.0 == encoding_rs::UTF_16LE {
106 let mut data = Vec::<u8>::with_capacity(input.len() * 2);
107
108 // Convert the input string to UTF-16LE bytes
109 let utf16le_bytes = input.encode_utf16().flat_map(|u| u.to_le_bytes());
110
111 data.extend(utf16le_bytes);
112 return Ok(data);
113 } else {
114 let (cow, _encoding_used, _had_errors) = self.0.encode(&input);
115
116 Ok(cow.into_owned())
117 }
118 }
119}
120
121/// Convert a byte vector from a specified encoding to a UTF-8 string.
122pub async fn to_utf8(
123 input: Vec<u8>,
124 mut encoding: EncodingWrapper,
125 force: bool,
126 detect_utf16: bool,
127 buffer_encoding: Option<Arc<Mutex<&'static Encoding>>>,
128) -> Result<String> {
129 encoding
130 .decode(input, force, detect_utf16, buffer_encoding)
131 .await
132}
133
134/// Convert a UTF-8 string to a byte vector in a specified encoding.
135pub async fn from_utf8(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
136 target.encode(input).await
137}
138
139pub struct EncodingOptions {
140 pub encoding: Arc<Mutex<EncodingWrapper>>,
141 pub force: AtomicBool,
142 pub detect_utf16: AtomicBool,
143}
144
145impl Default for EncodingOptions {
146 fn default() -> Self {
147 EncodingOptions {
148 encoding: Arc::new(Mutex::new(EncodingWrapper::default())),
149 force: AtomicBool::new(false),
150 detect_utf16: AtomicBool::new(true),
151 }
152 }
153}