1// Copyright (c) 2017 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7use base64::{engine::general_purpose::STANDARD as Base64Engine, Engine};
8use xso::error::Error;
9
10/// A trait for codecs that can decode and encode text nodes.
11pub trait Codec {
12 type Decoded;
13
14 /// Decode the given string into the codec’s output.
15 fn decode(s: &str) -> Result<Self::Decoded, Error>;
16
17 /// Encode the given value; return None to not produce a text node at all.
18 fn encode(decoded: &Self::Decoded) -> Option<String>;
19}
20
21/// Codec for text content.
22pub struct Text;
23
24impl Codec for Text {
25 type Decoded = String;
26
27 fn decode(s: &str) -> Result<String, Error> {
28 Ok(s.to_owned())
29 }
30
31 fn encode(decoded: &String) -> Option<String> {
32 Some(decoded.to_owned())
33 }
34}
35
36/// Codec transformer that makes the text optional; a "" string is decoded as None.
37pub struct OptionalCodec<T: Codec>(std::marker::PhantomData<T>);
38
39impl<T> Codec for OptionalCodec<T>
40where
41 T: Codec,
42{
43 type Decoded = Option<T::Decoded>;
44
45 fn decode(s: &str) -> Result<Option<T::Decoded>, Error> {
46 if s.is_empty() {
47 return Ok(None);
48 }
49
50 Ok(Some(T::decode(s)?))
51 }
52
53 fn encode(decoded: &Option<T::Decoded>) -> Option<String> {
54 decoded.as_ref().and_then(T::encode)
55 }
56}
57
58/// Codec that trims whitespace around the text.
59pub struct Trimmed<T: Codec>(std::marker::PhantomData<T>);
60
61impl<T> Codec for Trimmed<T>
62where
63 T: Codec,
64{
65 type Decoded = T::Decoded;
66
67 fn decode(s: &str) -> Result<T::Decoded, Error> {
68 match s.trim() {
69 // TODO: This error message can be a bit opaque when used
70 // in-context; ideally it'd be configurable.
71 "" => Err(Error::Other(
72 "The text in the element's text node was empty after trimming.",
73 )),
74 trimmed => T::decode(trimmed),
75 }
76 }
77
78 fn encode(decoded: &T::Decoded) -> Option<String> {
79 T::encode(decoded)
80 }
81}
82
83/// Codec wrapping that encodes/decodes a string as base64.
84pub struct Base64;
85
86impl Codec for Base64 {
87 type Decoded = Vec<u8>;
88
89 fn decode(s: &str) -> Result<Vec<u8>, Error> {
90 Base64Engine.decode(s).map_err(Error::text_parse_error)
91 }
92
93 fn encode(decoded: &Vec<u8>) -> Option<String> {
94 Some(Base64Engine.encode(decoded))
95 }
96}
97
98/// Codec wrapping base64 encode/decode, while ignoring whitespace characters.
99pub struct WhitespaceAwareBase64;
100
101impl Codec for WhitespaceAwareBase64 {
102 type Decoded = Vec<u8>;
103
104 fn decode(s: &str) -> Result<Self::Decoded, Error> {
105 let s: String = s
106 .chars()
107 .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
108 .collect();
109
110 Base64Engine.decode(s).map_err(Error::text_parse_error)
111 }
112
113 fn encode(decoded: &Self::Decoded) -> Option<String> {
114 Some(Base64Engine.encode(decoded))
115 }
116}
117
118/// Codec for bytes of lowercase hexadecimal, with a fixed length `N` (in bytes).
119pub struct FixedHex<const N: usize>;
120
121impl<const N: usize> Codec for FixedHex<N> {
122 type Decoded = [u8; N];
123
124 fn decode(s: &str) -> Result<Self::Decoded, Error> {
125 if s.len() != 2 * N {
126 return Err(Error::Other("Invalid length"));
127 }
128
129 let mut bytes = [0u8; N];
130 for i in 0..N {
131 bytes[i] =
132 u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
133 }
134
135 Ok(bytes)
136 }
137
138 fn encode(decoded: &Self::Decoded) -> Option<String> {
139 let mut bytes = String::with_capacity(N * 2);
140 for byte in decoded {
141 bytes.extend(format!("{:02x}", byte).chars());
142 }
143 Some(bytes)
144 }
145}
146
147/// Codec for colon-separated bytes of uppercase hexadecimal.
148pub struct ColonSeparatedHex;
149
150impl Codec for ColonSeparatedHex {
151 type Decoded = Vec<u8>;
152
153 fn decode(s: &str) -> Result<Self::Decoded, Error> {
154 let mut bytes = vec![];
155 for i in 0..(1 + s.len()) / 3 {
156 let byte =
157 u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
158 if 3 * i + 2 < s.len() {
159 assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
160 }
161 bytes.push(byte);
162 }
163 Ok(bytes)
164 }
165
166 fn encode(decoded: &Self::Decoded) -> Option<String> {
167 let mut bytes = vec![];
168 for byte in decoded {
169 bytes.push(format!("{:02X}", byte));
170 }
171 Some(bytes.join(":"))
172 }
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 #[test]
180 fn fixed_hex() {
181 let value = [0x01, 0xfe, 0xef];
182
183 // Test that we support both lowercase and uppercase as input.
184 let hex = FixedHex::<3>::decode("01feEF").unwrap();
185 assert_eq!(&hex, &value);
186
187 // Test that we do output lowercase.
188 let hex = FixedHex::<3>::encode(&value).unwrap();
189 assert_eq!(hex, "01feef");
190
191 // What if we give it a string that's too long?
192 let err = FixedHex::<3>::decode("01feEF01").unwrap_err();
193 assert_eq!(err.to_string(), "Invalid length");
194
195 // Too short?
196 let err = FixedHex::<3>::decode("01fe").unwrap_err();
197 assert_eq!(err.to_string(), "Invalid length");
198
199 // Not-even numbers?
200 let err = FixedHex::<3>::decode("01feE").unwrap_err();
201 assert_eq!(err.to_string(), "Invalid length");
202
203 // No colon supported.
204 let err = FixedHex::<3>::decode("0:f:EF").unwrap_err();
205 assert_eq!(
206 err.to_string(),
207 "text parse error: invalid digit found in string"
208 );
209
210 // No non-hex character allowed.
211 let err = FixedHex::<3>::decode("01defg").unwrap_err();
212 assert_eq!(
213 err.to_string(),
214 "text parse error: invalid digit found in string"
215 );
216 }
217}