text_node_codecs.rs

  1// Copyright (c) 2017 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7use crate::util::error::Error;
  8use base64::{engine::general_purpose::STANDARD as Base64Engine, Engine};
  9use jid::Jid;
 10use std::str::FromStr;
 11
 12/// A trait for codecs that can decode and encode text nodes.
 13pub trait Codec {
 14    type Decoded;
 15
 16    /// Decode the given string into the codec’s output.
 17    fn decode(s: &str) -> Result<Self::Decoded, Error>;
 18
 19    /// Encode the given value; return None to not produce a text node at all.
 20    fn encode(decoded: &Self::Decoded) -> Option<String>;
 21}
 22
 23/// Codec for text content.
 24pub struct Text;
 25
 26impl Codec for Text {
 27    type Decoded = String;
 28
 29    fn decode(s: &str) -> Result<String, Error> {
 30        Ok(s.to_owned())
 31    }
 32
 33    fn encode(decoded: &String) -> Option<String> {
 34        Some(decoded.to_owned())
 35    }
 36}
 37
 38/// Codec transformer that makes the text optional; a "" string is decoded as None.
 39pub struct OptionalCodec<T: Codec>(std::marker::PhantomData<T>);
 40
 41impl<T> Codec for OptionalCodec<T>
 42where
 43    T: Codec,
 44{
 45    type Decoded = Option<T::Decoded>;
 46
 47    fn decode(s: &str) -> Result<Option<T::Decoded>, Error> {
 48        if s.is_empty() {
 49            return Ok(None);
 50        }
 51
 52        Ok(Some(T::decode(s)?))
 53    }
 54
 55    fn encode(decoded: &Option<T::Decoded>) -> Option<String> {
 56        decoded.as_ref().and_then(T::encode)
 57    }
 58}
 59
 60/// Codec that trims whitespace around the text.
 61pub struct Trimmed<T: Codec>(std::marker::PhantomData<T>);
 62
 63impl<T> Codec for Trimmed<T>
 64where
 65    T: Codec,
 66{
 67    type Decoded = T::Decoded;
 68
 69    fn decode(s: &str) -> Result<T::Decoded, Error> {
 70        match s.trim() {
 71            // TODO: This error message can be a bit opaque when used
 72            // in-context; ideally it'd be configurable.
 73            "" => Err(Error::ParseError(
 74                "The text in the element's text node was empty after trimming.",
 75            )),
 76            trimmed => T::decode(trimmed),
 77        }
 78    }
 79
 80    fn encode(decoded: &T::Decoded) -> Option<String> {
 81        T::encode(decoded)
 82    }
 83}
 84
 85/// Codec wrapping that encodes/decodes a string as base64.
 86pub struct Base64;
 87
 88impl Codec for Base64 {
 89    type Decoded = Vec<u8>;
 90
 91    fn decode(s: &str) -> Result<Vec<u8>, Error> {
 92        Ok(Base64Engine.decode(s)?)
 93    }
 94
 95    fn encode(decoded: &Vec<u8>) -> Option<String> {
 96        Some(Base64Engine.encode(decoded))
 97    }
 98}
 99
100/// Codec wrapping base64 encode/decode, while ignoring whitespace characters.
101pub struct WhitespaceAwareBase64;
102
103impl Codec for WhitespaceAwareBase64 {
104    type Decoded = Vec<u8>;
105
106    fn decode(s: &str) -> Result<Self::Decoded, Error> {
107        let s: String = s
108            .chars()
109            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
110            .collect();
111
112        Ok(Base64Engine.decode(s)?)
113    }
114
115    fn encode(decoded: &Self::Decoded) -> Option<String> {
116        Some(Base64Engine.encode(decoded))
117    }
118}
119
120/// Codec for bytes of lowercase hexadecimal, with a fixed length `N` (in bytes).
121pub struct FixedHex<const N: usize>;
122
123impl<const N: usize> Codec for FixedHex<N> {
124    type Decoded = [u8; N];
125
126    fn decode(s: &str) -> Result<Self::Decoded, Error> {
127        if s.len() != 2 * N {
128            return Err(Error::ParseError("Invalid length"));
129        }
130
131        let mut bytes = [0u8; N];
132        for i in 0..N {
133            bytes[i] = u8::from_str_radix(&s[2 * i..2 * i + 2], 16)?;
134        }
135
136        Ok(bytes)
137    }
138
139    fn encode(decoded: &Self::Decoded) -> Option<String> {
140        let mut bytes = String::with_capacity(N * 2);
141        for byte in decoded {
142            bytes.extend(format!("{:02x}", byte).chars());
143        }
144        Some(bytes)
145    }
146}
147
148/// Codec for colon-separated bytes of uppercase hexadecimal.
149pub struct ColonSeparatedHex;
150
151impl Codec for ColonSeparatedHex {
152    type Decoded = Vec<u8>;
153
154    fn decode(s: &str) -> Result<Self::Decoded, Error> {
155        let mut bytes = vec![];
156        for i in 0..(1 + s.len()) / 3 {
157            let byte = u8::from_str_radix(&s[3 * i..3 * i + 2], 16)?;
158            if 3 * i + 2 < s.len() {
159                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
160            }
161            bytes.push(byte);
162        }
163        Ok(bytes)
164    }
165
166    fn encode(decoded: &Self::Decoded) -> Option<String> {
167        let mut bytes = vec![];
168        for byte in decoded {
169            bytes.push(format!("{:02X}", byte));
170        }
171        Some(bytes.join(":"))
172    }
173}
174
175/// Codec for a JID.
176pub struct JidCodec;
177
178impl Codec for JidCodec {
179    type Decoded = Jid;
180
181    fn decode(s: &str) -> Result<Jid, Error> {
182        Ok(Jid::from_str(s)?)
183    }
184
185    fn encode(jid: &Jid) -> Option<String> {
186        Some(jid.to_string())
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193
194    #[test]
195    fn fixed_hex() {
196        let value = [0x01, 0xfe, 0xef];
197
198        // Test that we support both lowercase and uppercase as input.
199        let hex = FixedHex::<3>::decode("01feEF").unwrap();
200        assert_eq!(&hex, &value);
201
202        // Test that we do output lowercase.
203        let hex = FixedHex::<3>::encode(&value).unwrap();
204        assert_eq!(hex, "01feef");
205
206        // What if we give it a string that's too long?
207        let err = FixedHex::<3>::decode("01feEF01").unwrap_err();
208        assert_eq!(err.to_string(), "parse error: Invalid length");
209
210        // Too short?
211        let err = FixedHex::<3>::decode("01fe").unwrap_err();
212        assert_eq!(err.to_string(), "parse error: Invalid length");
213
214        // Not-even numbers?
215        let err = FixedHex::<3>::decode("01feE").unwrap_err();
216        assert_eq!(err.to_string(), "parse error: Invalid length");
217
218        // No colon supported.
219        let err = FixedHex::<3>::decode("0:f:EF").unwrap_err();
220        assert_eq!(
221            err.to_string(),
222            "integer parsing error: invalid digit found in string"
223        );
224
225        // No non-hex character allowed.
226        let err = FixedHex::<3>::decode("01defg").unwrap_err();
227        assert_eq!(
228            err.to_string(),
229            "integer parsing error: invalid digit found in string"
230        );
231    }
232}