@@ -38,8 +38,13 @@ impl EncodingWrapper {
EncodingWrapper(encoding)
}
+ pub fn get_encoding(&self) -> &'static Encoding {
+ self.0
+ }
+
pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
let (cow, _had_errors) = self.0.decode_with_bom_removal(&input);
+
// `encoding_rs` handles invalid bytes by replacing them with replacement characters
// in the output string, so we return the result even if there were errors.
// This preserves the original behaviour where files with invalid bytes could still be opened.
@@ -48,30 +53,27 @@ impl EncodingWrapper {
pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
if self.0 == encoding_rs::UTF_16BE {
- let mut data: Vec<u8> = vec![];- let utf = input.encode_utf16().collect::<Vec<u16>>();-- for i in utf {- let byte = i.to_be_bytes();- for b in byte {- data.push(b);- }- }
+ let mut data = Vec::<u8>::new();
+ data.reserve(input.len() * 2); // Reserve space for UTF-16BE bytes
+
+ // Convert the input string to UTF-16BE bytes
+ let utf16be_bytes: Vec<u8> =
+ input.encode_utf16().flat_map(|u| u.to_be_bytes()).collect();
+
+ data.extend(utf16be_bytes);
return Ok(data);
} else if self.0 == encoding_rs::UTF_16LE {
- let mut data: Vec<u8> = vec![];- let utf = input.encode_utf16().collect::<Vec<u16>>();-- for i in utf {- let byte = i.to_le_bytes();- for b in byte {- data.push(b);- }- }
+ let mut data = Vec::<u8>::new();
+ data.reserve(input.len() * 2); // Reserve space for UTF-16LE bytes
+
+ // Convert the input string to UTF-16LE bytes
+ let utf16le_bytes: Vec<u8> =
+ input.encode_utf16().flat_map(|u| u.to_le_bytes()).collect();
+
+ data.extend(utf16le_bytes);
return Ok(data);
} else {
let (cow, _encoding_used, _had_errors) = self.0.encode(&input);
- println!("Encoding: {:?}", self);
// `encoding_rs` handles unencodable characters by replacing them with
// appropriate substitutes in the output, so we return the result even if there were errors.
// This maintains consistency with the decode behaviour.
@@ -694,11 +694,24 @@ impl Fs for RealFs {
}
let file = smol::fs::File::create(path).await?;
let mut writer = smol::io::BufWriter::with_capacity(buffer_size, file);
+
+ // BOM for UTF-16 is written at the start of the file here because
+ // if BOM is written in the `encode` function of `fs::encodings`, it would be written
+ // for every chunk, resulting in multiple BOMs in the file.
+ if encoding.get_encoding() == encoding_rs::UTF_16BE {
+ // Write BOM for UTF-16BE
+ writer.write_all(&[0xFE, 0xFF]).await?;
+ } else if encoding.get_encoding() == encoding_rs::UTF_16LE {
+ // Write BOM for UTF-16LE
+ writer.write_all(&[0xFF, 0xFE]).await?;
+ }
+
for chunk in chunks(text, line_ending) {
writer
.write_all(&from_utf8(chunk.to_string(), encoding.clone()).await?)
- .await?;
+ .await?
}
+
writer.flush().await?;
Ok(())
}