1//! Encoding and decoding utilities using the `encoding_rs` crate.
2use std::fmt::Debug;
3
4use anyhow::Result;
5use encoding_rs::Encoding;
6
7/// A wrapper around `encoding_rs::Encoding` to implement `Send` and `Sync`.
8/// Since the reference is static, it is safe to send it across threads.
9pub struct EncodingWrapper(&'static Encoding);
10
11impl Debug for EncodingWrapper {
12 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
13 f.debug_tuple(&format!("EncodingWrapper{:?}", self.0))
14 .field(&self.0.name())
15 .finish()
16 }
17}
18
19pub struct EncodingWrapperVisitor;
20
21impl PartialEq for EncodingWrapper {
22 fn eq(&self, other: &Self) -> bool {
23 self.0.name() == other.0.name()
24 }
25}
26
27unsafe impl Send for EncodingWrapper {}
28unsafe impl Sync for EncodingWrapper {}
29
30impl Clone for EncodingWrapper {
31 fn clone(&self) -> Self {
32 EncodingWrapper(self.0)
33 }
34}
35
36impl EncodingWrapper {
37 pub fn new(encoding: &'static Encoding) -> EncodingWrapper {
38 EncodingWrapper(encoding)
39 }
40
41 pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
42 let (cow, _had_errors) = self.0.decode_with_bom_removal(&input);
43 // `encoding_rs` handles invalid bytes by replacing them with replacement characters
44 // in the output string, so we return the result even if there were errors.
45 // This preserves the original behaviour where files with invalid bytes could still be opened.
46 Ok(cow.into_owned())
47 }
48
49 pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
50 if self.0 == encoding_rs::UTF_16BE {
51 let mut data: Vec<u8> = vec![];
52 let utf = input.encode_utf16().collect::<Vec<u16>>();
53
54 for i in utf {
55 let byte = i.to_be_bytes();
56 for b in byte {
57 data.push(b);
58 }
59 }
60 return Ok(data);
61 } else if self.0 == encoding_rs::UTF_16LE {
62 let mut data: Vec<u8> = vec![];
63 let utf = input.encode_utf16().collect::<Vec<u16>>();
64
65 for i in utf {
66 let byte = i.to_le_bytes();
67 for b in byte {
68 data.push(b);
69 }
70 }
71 return Ok(data);
72 } else {
73 let (cow, _encoding_used, _had_errors) = self.0.encode(&input);
74 println!("Encoding: {:?}", self);
75 // `encoding_rs` handles unencodable characters by replacing them with
76 // appropriate substitutes in the output, so we return the result even if there were errors.
77 // This maintains consistency with the decode behaviour.
78 Ok(cow.into_owned())
79 }
80 }
81}
82
83/// Convert a byte vector from a specified encoding to a UTF-8 string.
84pub async fn to_utf8(input: Vec<u8>, encoding: EncodingWrapper) -> Result<String> {
85 encoding.decode(input).await
86}
87
88/// Convert a UTF-8 string to a byte vector in a specified encoding.
89pub async fn from_utf8(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
90 target.encode(input).await
91}