text_node_codecs.rs

  1// Copyright (c) 2017 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7use base64::{engine::general_purpose::STANDARD as Base64Engine, Engine};
  8use xso::error::Error;
  9
 10/// A trait for codecs that can decode and encode text nodes.
 11pub trait Codec {
 12    type Decoded;
 13
 14    /// Decode the given string into the codec’s output.
 15    fn decode(s: &str) -> Result<Self::Decoded, Error>;
 16
 17    /// Encode the given value; return None to not produce a text node at all.
 18    fn encode(decoded: &Self::Decoded) -> Option<String>;
 19}
 20
 21/// Codec for text content.
 22pub struct Text;
 23
 24impl Codec for Text {
 25    type Decoded = String;
 26
 27    fn decode(s: &str) -> Result<String, Error> {
 28        Ok(s.to_owned())
 29    }
 30
 31    fn encode(decoded: &String) -> Option<String> {
 32        Some(decoded.to_owned())
 33    }
 34}
 35
 36/// Codec transformer that makes the text optional; a "" string is decoded as None.
 37pub struct OptionalCodec<T: Codec>(std::marker::PhantomData<T>);
 38
 39impl<T> Codec for OptionalCodec<T>
 40where
 41    T: Codec,
 42{
 43    type Decoded = Option<T::Decoded>;
 44
 45    fn decode(s: &str) -> Result<Option<T::Decoded>, Error> {
 46        if s.is_empty() {
 47            return Ok(None);
 48        }
 49
 50        Ok(Some(T::decode(s)?))
 51    }
 52
 53    fn encode(decoded: &Option<T::Decoded>) -> Option<String> {
 54        decoded.as_ref().and_then(T::encode)
 55    }
 56}
 57
 58/// Codec that trims whitespace around the text.
 59pub struct Trimmed<T: Codec>(std::marker::PhantomData<T>);
 60
 61impl<T> Codec for Trimmed<T>
 62where
 63    T: Codec,
 64{
 65    type Decoded = T::Decoded;
 66
 67    fn decode(s: &str) -> Result<T::Decoded, Error> {
 68        match s.trim() {
 69            // TODO: This error message can be a bit opaque when used
 70            // in-context; ideally it'd be configurable.
 71            "" => Err(Error::Other(
 72                "The text in the element's text node was empty after trimming.",
 73            )),
 74            trimmed => T::decode(trimmed),
 75        }
 76    }
 77
 78    fn encode(decoded: &T::Decoded) -> Option<String> {
 79        T::encode(decoded)
 80    }
 81}
 82
 83/// Codec wrapping that encodes/decodes a string as base64.
 84pub struct Base64;
 85
 86impl Codec for Base64 {
 87    type Decoded = Vec<u8>;
 88
 89    fn decode(s: &str) -> Result<Vec<u8>, Error> {
 90        Base64Engine.decode(s).map_err(Error::text_parse_error)
 91    }
 92
 93    fn encode(decoded: &Vec<u8>) -> Option<String> {
 94        Some(Base64Engine.encode(decoded))
 95    }
 96}
 97
 98/// Codec wrapping base64 encode/decode, while ignoring whitespace characters.
 99pub struct WhitespaceAwareBase64;
100
101impl Codec for WhitespaceAwareBase64 {
102    type Decoded = Vec<u8>;
103
104    fn decode(s: &str) -> Result<Self::Decoded, Error> {
105        let s: String = s
106            .chars()
107            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
108            .collect();
109
110        Base64Engine.decode(s).map_err(Error::text_parse_error)
111    }
112
113    fn encode(decoded: &Self::Decoded) -> Option<String> {
114        Some(Base64Engine.encode(decoded))
115    }
116}
117
118/// Codec for bytes of lowercase hexadecimal, with a fixed length `N` (in bytes).
119pub struct FixedHex<const N: usize>;
120
121impl<const N: usize> Codec for FixedHex<N> {
122    type Decoded = [u8; N];
123
124    fn decode(s: &str) -> Result<Self::Decoded, Error> {
125        if s.len() != 2 * N {
126            return Err(Error::Other("Invalid length"));
127        }
128
129        let mut bytes = [0u8; N];
130        for i in 0..N {
131            bytes[i] =
132                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
133        }
134
135        Ok(bytes)
136    }
137
138    fn encode(decoded: &Self::Decoded) -> Option<String> {
139        let mut bytes = String::with_capacity(N * 2);
140        for byte in decoded {
141            bytes.extend(format!("{:02x}", byte).chars());
142        }
143        Some(bytes)
144    }
145}
146
147/// Codec for colon-separated bytes of uppercase hexadecimal.
148pub struct ColonSeparatedHex;
149
150impl Codec for ColonSeparatedHex {
151    type Decoded = Vec<u8>;
152
153    fn decode(s: &str) -> Result<Self::Decoded, Error> {
154        let mut bytes = vec![];
155        for i in 0..(1 + s.len()) / 3 {
156            let byte =
157                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
158            if 3 * i + 2 < s.len() {
159                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
160            }
161            bytes.push(byte);
162        }
163        Ok(bytes)
164    }
165
166    fn encode(decoded: &Self::Decoded) -> Option<String> {
167        let mut bytes = vec![];
168        for byte in decoded {
169            bytes.push(format!("{:02X}", byte));
170        }
171        Some(bytes.join(":"))
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn fixed_hex() {
181        let value = [0x01, 0xfe, 0xef];
182
183        // Test that we support both lowercase and uppercase as input.
184        let hex = FixedHex::<3>::decode("01feEF").unwrap();
185        assert_eq!(&hex, &value);
186
187        // Test that we do output lowercase.
188        let hex = FixedHex::<3>::encode(&value).unwrap();
189        assert_eq!(hex, "01feef");
190
191        // What if we give it a string that's too long?
192        let err = FixedHex::<3>::decode("01feEF01").unwrap_err();
193        assert_eq!(err.to_string(), "Invalid length");
194
195        // Too short?
196        let err = FixedHex::<3>::decode("01fe").unwrap_err();
197        assert_eq!(err.to_string(), "Invalid length");
198
199        // Not-even numbers?
200        let err = FixedHex::<3>::decode("01feE").unwrap_err();
201        assert_eq!(err.to_string(), "Invalid length");
202
203        // No colon supported.
204        let err = FixedHex::<3>::decode("0:f:EF").unwrap_err();
205        assert_eq!(
206            err.to_string(),
207            "text parse error: invalid digit found in string"
208        );
209
210        // No non-hex character allowed.
211        let err = FixedHex::<3>::decode("01defg").unwrap_err();
212        assert_eq!(
213            err.to_string(),
214            "text parse error: invalid digit found in string"
215        );
216    }
217}