WIP: Add ICU bindings for stringprep, idna2008 and spoof checker.

Emmanuel Gil Peyrot created 6 years ago

Change summary

Cargo.toml            |   2 
icu/Cargo.toml        |  12 +++
icu/build.rs          |   5 +
icu/src/bindings.c    |  54 ++++++++++++++++
icu/src/bindings.rs   | 149 +++++++++++++++++++++++++++++++++++++++++++++
icu/src/error.rs      |  67 ++++++++++++++++++++
icu/src/idna2008.rs   |  69 ++++++++++++++++++++
icu/src/lib.rs        | 141 ++++++++++++++++++++++++++++++++++++++++++
icu/src/spoof.rs      |  52 +++++++++++++++
icu/src/stringprep.rs |  88 ++++++++++++++++++++++++++
jid/Cargo.toml        |   1 
jid/src/lib.rs        |  33 +++++++++
12 files changed, 671 insertions(+), 2 deletions(-)

Detailed changes

Cargo.toml 🔗

@@ -1,5 +1,6 @@
 [workspace]
 members = [  # alphabetically sorted
+  "icu",
   "jid",
   "minidom",
   "parsers",
@@ -8,6 +9,7 @@ members = [  # alphabetically sorted
 ]
 
 [patch.crates-io]
+icu = { path = "icu" }
 jid = { path = "jid" }
 minidom = { path = "minidom" }
 tokio-xmpp = { path = "tokio-xmpp" }

icu/Cargo.toml 🔗

@@ -0,0 +1,12 @@
+[package]
+name = "icu"
+version = "0.1.0"
+authors = ["Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+
+[build-dependencies]
+cc = "1"

icu/build.rs 🔗

@@ -0,0 +1,5 @@
+fn main() {
+    cc::Build::new().file("src/bindings.c").compile("bindings");
+    println!("cargo:rustc-link-lib=dylib=icuuc");
+    println!("cargo:rustc-link-lib=dylib=icui18n");
+}

icu/src/bindings.c 🔗

@@ -0,0 +1,54 @@
+// This file is a stupid wrapper to avoid the automated suffixing libicu is
+// doing in unicode/urename.h.
+//
+// By default it will suffix each of its symbols with "_65" (with 65 being the
+// soname), which completely messes with Rust’s binding ability.
+
+#include <unicode/umachine.h>
+#include <unicode/utypes.h>
+#include <unicode/usprep.h>
+#include <unicode/utrace.h>
+#include <unicode/uidna.h>
+#include <unicode/uspoof.h>
+#include <unicode/ustring.h>
+#include <string.h>
+
+const char* icu_error_code_to_name(UErrorCode code) {
+	return u_errorName(code);
+}
+
+UIDNA* icu_idna_open(uint32_t options, UErrorCode* pErrorCode) {
+	return uidna_openUTS46(options, pErrorCode);
+}
+
+int32_t icu_idna_name_to_ascii(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) {
+	return uidna_nameToASCII_UTF8(idna, name, length, dest, capacity, pInfo, pErrorCode);
+}
+
+int32_t icu_idna_name_to_unicode(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) {
+	return uidna_nameToUnicodeUTF8(idna, name, length, dest, capacity, pInfo, pErrorCode);
+}
+
+UStringPrepProfile* icu_stringprep_open(UStringPrepProfileType type, UErrorCode* status) {
+	return usprep_openByType(type, status);
+}
+
+int32_t icu_stringprep_prepare(const UStringPrepProfile* prep, const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status) {
+	return usprep_prepare(prep, src, srcLength, dest, destCapacity, options, parseError, status);
+}
+
+void icu_trace_set_level(UTraceLevel traceLevel) {
+	utrace_setLevel(traceLevel);
+}
+
+USpoofChecker* icu_spoof_open(UErrorCode* status) {
+	return uspoof_open(status);
+}
+
+void icu_spoof_set_checks(USpoofChecker* sc, int32_t checks, UErrorCode* status) {
+	uspoof_setChecks(sc, checks, status);
+}
+
+int32_t icu_spoof_get_skeleton(USpoofChecker* sc, uint32_t type, const char* id, int32_t length, char* dest, int32_t destCapacity, UErrorCode* status) {
+	return uspoof_getSkeletonUTF8(sc, type, id, length, dest, destCapacity, status);
+}

icu/src/bindings.rs 🔗

@@ -0,0 +1,149 @@
+//! Crate wrapping what we need from ICU’s C API for JIDs.
+//!
+//! See http://site.icu-project.org/
+
+use std::os::raw::c_char;
+
+// From unicode/umachine.h
+pub(crate) type UChar = u16;
+
+// From unicode/utypes.h
+pub(crate) type UErrorCode = u32;
+pub(crate) const U_ZERO_ERROR: UErrorCode = 0;
+
+pub(crate) type UStringPrepProfile = u32;
+type UParseError = u32;
+
+// From unicode/usprep.h
+pub(crate) const USPREP_DEFAULT: i32 = 0;
+pub(crate) const USPREP_ALLOW_UNASSIGNED: i32 = 1;
+
+pub(crate) type UStringPrepProfileType = u32;
+pub(crate) const USPREP_RFC3491_NAMEPREP: UStringPrepProfileType = 0;
+pub(crate) const USPREP_RFC3920_NODEPREP: UStringPrepProfileType = 7;
+pub(crate) const USPREP_RFC3920_RESOURCEPREP: UStringPrepProfileType = 8;
+pub(crate) const USPREP_RFC4013_SASLPREP: UStringPrepProfileType = 10;
+
+// From unicode/utrace.h
+type UTraceLevel = i32;
+pub(crate) const UTRACE_VERBOSE: UTraceLevel = 9;
+
+// From unicode/uidna.h
+#[repr(C)]
+pub(crate) struct UIDNA {
+    _unused: [u8; 0],
+}
+type UBool = i8;
+
+#[repr(C)]
+pub(crate) struct UIDNAInfo {
+    size: i16,
+    is_transitional_different: UBool,
+    reserved_b3: UBool,
+    errors: u32,
+    reserved_i2: i32,
+    reserved_i3: i32,
+}
+
+impl UIDNAInfo {
+    pub(crate) fn new() -> UIDNAInfo {
+        assert_eq!(std::mem::size_of::<UIDNAInfo>(), 16);
+        UIDNAInfo {
+            size: std::mem::size_of::<UIDNAInfo>() as i16,
+            is_transitional_different: false as UBool,
+            reserved_b3: false as UBool,
+            errors: 0,
+            reserved_i2: 0,
+            reserved_i3: 0,
+        }
+    }
+
+    // TODO: Return a String instead, or a custom error type, this is a bitflag (defined in
+    // uidna.h) where multiple errors can be accumulated.
+    pub(crate) fn get_errors(&self) -> u32 {
+        self.errors
+    }
+}
+
+pub(crate) const UIDNA_DEFAULT: u32 = 0;
+pub(crate) const UIDNA_USE_STD3_RULES: u32 = 2;
+
+pub(crate) type UIdnaFunction = unsafe extern "C" fn(
+    *const UIDNA,
+    *const u8,
+    i32,
+    *mut u8,
+    i32,
+    *mut UIDNAInfo,
+    *mut u32,
+) -> i32;
+
+// From unicode/uspoof.h
+#[repr(C)]
+pub(crate) struct USpoofChecker {
+    _unused: [u8; 0],
+}
+pub(crate) const USPOOF_CONFUSABLE: i32 = 7;
+
+#[link(name = "bindings")]
+extern "C" {
+    // From unicode/ustring.h
+    pub(crate) fn icu_error_code_to_name(code: UErrorCode) -> *const c_char;
+
+    // From unicode/usprep.h
+    pub(crate) fn icu_stringprep_open(
+        type_: UStringPrepProfileType,
+        status: *mut UErrorCode,
+    ) -> *mut UStringPrepProfile;
+    pub(crate) fn icu_stringprep_prepare(
+        prep: *const UStringPrepProfile,
+        src: *const UChar,
+        srcLength: i32,
+        dest: *mut UChar,
+        destCapacity: i32,
+        options: i32,
+        parseError: *mut UParseError,
+        status: *mut UErrorCode,
+    ) -> i32;
+
+    // From unicode/utrace.h
+    pub(crate) fn icu_trace_set_level(traceLevel: UTraceLevel);
+
+    // From unicode/uidna.h
+    pub(crate) fn icu_idna_open(options: u32, pErrorCode: *mut UErrorCode) -> *mut UIDNA;
+    pub(crate) fn icu_idna_name_to_ascii(
+        idna: *const UIDNA,
+        name: *const u8,
+        length: i32,
+        dest: *mut u8,
+        capacity: i32,
+        pInfo: *mut UIDNAInfo,
+        pErrorCode: *mut UErrorCode,
+    ) -> i32;
+    pub(crate) fn icu_idna_name_to_unicode(
+        idna: *const UIDNA,
+        name: *const u8,
+        length: i32,
+        dest: *mut u8,
+        capacity: i32,
+        pInfo: *mut UIDNAInfo,
+        pErrorCode: *mut UErrorCode,
+    ) -> i32;
+
+    // From unicode/uspoof.h
+    pub(crate) fn icu_spoof_open(status: *mut UErrorCode) -> *mut USpoofChecker;
+    pub(crate) fn icu_spoof_set_checks(
+        sc: *mut USpoofChecker,
+        checks: i32,
+        status: *mut UErrorCode,
+    );
+    pub(crate) fn icu_spoof_get_skeleton(
+        sc: *const USpoofChecker,
+        type_: u32,
+        id: *const u8,
+        length: i32,
+        dest: *mut u8,
+        destCapacity: i32,
+        status: *mut UErrorCode,
+    ) -> i32;
+}

icu/src/error.rs 🔗

@@ -0,0 +1,67 @@
+//! Crate wrapping what we need from ICU’s C API for JIDs.
+//!
+//! See http://site.icu-project.org/
+
+use crate::bindings::{icu_error_code_to_name, UErrorCode};
+use std::ffi::CStr;
+
+/// Errors this library can produce.
+#[derive(Debug)]
+pub enum Error {
+    /// An error produced by one of the ICU functions.
+    Icu(String),
+
+    /// An error produced by one of the IDNA2008 ICU functions.
+    Idna(u32),
+
+    /// Some ICU function didn’t produce a valid UTF-8 string, should never happen.
+    Utf8(std::string::FromUtf8Error),
+
+    /// Some ICU function didn’t produce a valid UTF-8 string, should never happen.
+    Utf16(std::char::DecodeUtf16Error),
+
+    /// Some string was too long for its profile in JID.
+    TooLong,
+}
+
+impl PartialEq for Error {
+    fn eq(&self, other: &Self) -> bool {
+        match (self, other) {
+            (Error::Icu(s1), Error::Icu(s2)) => s1 == s2,
+            (Error::Idna(s1), Error::Idna(s2)) => s1 == s2,
+            // TODO: compare by something here?
+            (Error::Utf8(_s1), Error::Utf8(_s2)) => true,
+            (Error::Utf16(_s1), Error::Utf16(_s2)) => true,
+            (Error::TooLong, Error::TooLong) => true,
+            _ => false,
+        }
+    }
+}
+
+impl Eq for Error {}
+
+impl Error {
+    pub(crate) fn from_icu_code(err: UErrorCode) -> Error {
+        let ptr = unsafe { icu_error_code_to_name(err) };
+        let c_str = unsafe { CStr::from_ptr(ptr) };
+        Error::Icu(c_str.to_string_lossy().into_owned())
+    }
+}
+
+impl From<UErrorCode> for Error {
+    fn from(err: UErrorCode) -> Error {
+        Error::from_icu_code(err)
+    }
+}
+
+impl From<std::string::FromUtf8Error> for Error {
+    fn from(err: std::string::FromUtf8Error) -> Error {
+        Error::Utf8(err)
+    }
+}
+
+impl From<std::char::DecodeUtf16Error> for Error {
+    fn from(err: std::char::DecodeUtf16Error) -> Error {
+        Error::Utf16(err)
+    }
+}

icu/src/idna2008.rs 🔗

@@ -0,0 +1,69 @@
+//! Crate wrapping what we need from ICU’s C API for JIDs.
+//!
+//! See http://site.icu-project.org/
+
+use crate::bindings::{
+    icu_idna_name_to_ascii, icu_idna_name_to_unicode, icu_idna_open, UErrorCode, UIDNAInfo,
+    UIdnaFunction, UIDNA, U_ZERO_ERROR,
+};
+use crate::error::Error;
+
+/// TODO: IDNA2008 support.
+pub struct Idna {
+    inner: *mut UIDNA,
+}
+
+impl Idna {
+    /// Create a new Idna struct.
+    pub fn new(options: u32) -> Result<Idna, UErrorCode> {
+        let mut err: UErrorCode = U_ZERO_ERROR;
+        let inner = unsafe { icu_idna_open(options, &mut err) };
+        match err {
+            U_ZERO_ERROR => Ok(Idna { inner }),
+            err => Err(err),
+        }
+    }
+
+    /// Converts a whole domain name into its ASCII form for DNS lookup.
+    pub fn to_ascii(&self, input: &str) -> Result<String, Error> {
+        self.idna(input, icu_idna_name_to_ascii)
+    }
+
+    /// Converts a whole domain name into its Unicode form for human-readable display.
+    pub fn to_unicode(&self, input: &str) -> Result<String, Error> {
+        self.idna(input, icu_idna_name_to_unicode)
+    }
+
+    fn idna(&self, input: &str, function: UIdnaFunction) -> Result<String, Error> {
+        if input.len() > 255 {
+            return Err(Error::TooLong);
+        }
+
+        let mut err: UErrorCode = U_ZERO_ERROR;
+        let mut dest: Vec<u8> = vec![0u8; 256];
+        let mut info = UIDNAInfo::new();
+        let len = unsafe {
+            function(
+                self.inner,
+                input.as_ptr(),
+                input.len() as i32,
+                dest.as_mut_ptr(),
+                dest.len() as i32,
+                &mut info,
+                &mut err,
+            )
+        };
+        if err != U_ZERO_ERROR {
+            return Err(Error::from_icu_code(err));
+        }
+        let errors = info.get_errors();
+        if errors != 0 {
+            return Err(Error::Idna(errors));
+        }
+        if len > 255 {
+            return Err(Error::TooLong);
+        }
+        dest.truncate(len as usize);
+        Ok(String::from_utf8(dest)?)
+    }
+}

icu/src/lib.rs 🔗

@@ -0,0 +1,141 @@
+//! Crate wrapping what we need from ICU’s C API for JIDs.
+//!
+//! See http://site.icu-project.org/
+
+#![deny(missing_docs)]
+
+mod bindings;
+mod error;
+mod idna2008;
+mod spoof;
+mod stringprep;
+
+use crate::bindings::{
+    icu_trace_set_level, UIDNA_DEFAULT, UIDNA_USE_STD3_RULES, USPOOF_CONFUSABLE,
+    USPREP_RFC3491_NAMEPREP, USPREP_RFC3920_NODEPREP, USPREP_RFC3920_RESOURCEPREP,
+    USPREP_RFC4013_SASLPREP, UTRACE_VERBOSE,
+};
+pub use crate::error::Error;
+pub use crate::idna2008::Idna;
+pub use crate::spoof::SpoofChecker;
+pub use crate::stringprep::Stringprep;
+
+/// How unassigned codepoints should be handled.
+pub enum Strict {
+    /// All codepoints should be assigned, otherwise an error will be emitted.
+    True,
+
+    /// Codepoints can be unassigned.
+    AllowUnassigned,
+}
+
+/// Main struct of this module, exposing the needed ICU functions to JID.
+pub struct Icu {
+    /// Perform stringprep using the Nameprep profile.
+    ///
+    /// See [RFC3491](https://tools.ietf.org/html/rfc3491).
+    pub nameprep: Stringprep,
+
+    /// Perform stringprep using the Nodeprep profile.
+    ///
+    /// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A).
+    pub nodeprep: Stringprep,
+
+    /// Perform stringprep using the Resourceprep profile.
+    ///
+    /// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A).
+    pub resourceprep: Stringprep,
+
+    /// Perform stringprep using the Saslprep profile.
+    ///
+    /// See [RFC4013](https://tools.ietf.org/html/rfc4013).
+    pub saslprep: Stringprep,
+
+    /// IDNA2008 support.
+    ///
+    /// See [RFC5891](https://tools.ietf.org/html/rfc5891).
+    pub idna2008: Idna,
+
+    /// Spoof checker TODO: better doc.
+    pub spoofchecker: SpoofChecker,
+}
+
+impl Icu {
+    /// Create a new ICU struct, initialising stringprep profiles, IDNA2008, as well as a spoof
+    /// checker.
+    pub fn new() -> Result<Icu, Error> {
+        unsafe { icu_trace_set_level(UTRACE_VERBOSE) };
+
+        let nameprep = Stringprep::new(USPREP_RFC3491_NAMEPREP)?;
+        let nodeprep = Stringprep::new(USPREP_RFC3920_NODEPREP)?;
+        let resourceprep = Stringprep::new(USPREP_RFC3920_RESOURCEPREP)?;
+        let saslprep = Stringprep::new(USPREP_RFC4013_SASLPREP)?;
+
+        let mut options = UIDNA_DEFAULT;
+        options |= UIDNA_USE_STD3_RULES;
+        let idna2008 = Idna::new(options)?;
+
+        let spoofchecker = SpoofChecker::new(USPOOF_CONFUSABLE)?;
+
+        Ok(Icu {
+            nameprep,
+            nodeprep,
+            resourceprep,
+            saslprep,
+            idna2008,
+            spoofchecker,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn nameprep() {
+        let name = "Link";
+        let icu = Icu::new().unwrap();
+        let name = icu.nodeprep.stringprep(name, Strict::True).unwrap();
+        assert_eq!(name, "link");
+    }
+
+    #[test]
+    fn resourceprep() {
+        let name = "Test™";
+        let icu = Icu::new().unwrap();
+        let name = icu
+            .resourceprep
+            .stringprep(name, Strict::AllowUnassigned)
+            .unwrap();
+        assert_eq!(name, "TestTM");
+    }
+
+    #[test]
+    fn idna() {
+        let name = "☃.coM";
+        let icu = Icu::new().unwrap();
+        let name = icu.idna2008.to_ascii(name).unwrap();
+        assert_eq!(name, "xn--n3h.com");
+
+        let name = "xn--N3H.com";
+        let icu = Icu::new().unwrap();
+        let name = icu.idna2008.to_unicode(name).unwrap();
+        assert_eq!(name, "☃.com");
+    }
+
+    #[test]
+    fn spoof() {
+        // Non-breakable and narrow non-breakable spaces spoofing.
+        let name = "foo bar baz";
+        let icu = Icu::new().unwrap();
+        let name = icu.spoofchecker.get_skeleton(name).unwrap();
+        assert_eq!(name, "foo bar baz");
+
+        // Cyrillic spoofing.
+        let name = "Неllо wоrld";
+        let icu = Icu::new().unwrap();
+        let name = icu.spoofchecker.get_skeleton(name).unwrap();
+        assert_eq!(name, "Hello world");
+    }
+}

icu/src/spoof.rs 🔗

@@ -0,0 +1,52 @@
+//! Crate wrapping what we need from ICU’s C API for JIDs.
+//!
+//! See http://site.icu-project.org/
+
+use crate::bindings::{
+    icu_spoof_get_skeleton, icu_spoof_open, icu_spoof_set_checks, UErrorCode, USpoofChecker,
+    U_ZERO_ERROR,
+};
+use crate::error::Error;
+
+/// TODO: spoof checker.
+pub struct SpoofChecker {
+    inner: *mut USpoofChecker,
+}
+
+impl SpoofChecker {
+    /// Create a new SpoofChecker.
+    pub fn new(checks: i32) -> Result<SpoofChecker, UErrorCode> {
+        let mut err: UErrorCode = U_ZERO_ERROR;
+        let inner = unsafe { icu_spoof_open(&mut err) };
+        if err != U_ZERO_ERROR {
+            return Err(err);
+        }
+        unsafe { icu_spoof_set_checks(inner, checks, &mut err) };
+        if err != U_ZERO_ERROR {
+            return Err(err);
+        }
+        Ok(SpoofChecker { inner })
+    }
+
+    /// Transform a string into a skeleton for matching it with other potentially similar strings.
+    pub fn get_skeleton(&self, input: &str) -> Result<String, Error> {
+        let mut err: UErrorCode = U_ZERO_ERROR;
+        let mut dest: Vec<u8> = vec![0u8; 256];
+        let len = unsafe {
+            icu_spoof_get_skeleton(
+                self.inner,
+                0,
+                input.as_ptr(),
+                input.len() as i32,
+                dest.as_mut_ptr(),
+                dest.len() as i32,
+                &mut err,
+            )
+        };
+        if err != U_ZERO_ERROR {
+            return Err(Error::from_icu_code(err));
+        }
+        dest.truncate(len as usize);
+        Ok(String::from_utf8(dest)?)
+    }
+}

icu/src/stringprep.rs 🔗

@@ -0,0 +1,88 @@
+//! Crate wrapping what we need from ICU’s C API for JIDs.
+//!
+//! See http://site.icu-project.org/
+
+use crate::bindings::{
+    icu_stringprep_open, icu_stringprep_prepare, UChar, UErrorCode, UStringPrepProfile,
+    UStringPrepProfileType, USPREP_ALLOW_UNASSIGNED, USPREP_DEFAULT, U_ZERO_ERROR,
+};
+use crate::error::Error;
+use crate::Strict;
+use std::ptr::null_mut;
+
+/// Struct representing a given stringprep profile.
+pub struct Stringprep {
+    inner: *mut UStringPrepProfile,
+}
+
+impl Stringprep {
+    /// Create a new Stringprep struct for the given profile.
+    pub(crate) fn new(profile: UStringPrepProfileType) -> Result<Stringprep, UErrorCode> {
+        let mut err: UErrorCode = U_ZERO_ERROR;
+        let inner = unsafe { icu_stringprep_open(profile, &mut err) };
+        match err {
+            U_ZERO_ERROR => Ok(Stringprep { inner }),
+            err => Err(err),
+        }
+    }
+
+    /// Perform a stringprep operation using this profile.
+    ///
+    /// # Panics
+    /// Panics if ICU doesn’t return a valid UTF-16 string, which should never happen.
+    pub fn stringprep(&self, input: &str, strict: Strict) -> Result<String, Error> {
+        if input.len() > 1023 {
+            return Err(Error::TooLong);
+        }
+
+        // ICU works on UTF-16 data, so convert it first.
+        let unprepped: Vec<UChar> = input.encode_utf16().collect();
+
+        // Now do the actual stringprep operation.
+        let mut prepped: Vec<UChar> = vec![0u16; 1024];
+        let flags = match strict {
+            Strict::True => USPREP_DEFAULT,
+            Strict::AllowUnassigned => USPREP_ALLOW_UNASSIGNED,
+        };
+        self.prepare(&unprepped, &mut prepped, flags)?;
+
+        // And then convert it back to UTF-8.
+        let output = std::char::decode_utf16(prepped.into_iter())
+            //.map(Result::unwrap)
+            .try_fold(Vec::new(), |mut acc, c| match c {
+                Ok(c) => {
+                    acc.push(c);
+                    Ok(acc)
+                }
+                Err(err) => Err(err),
+            })?;
+        let output: String = output.into_iter().collect();
+
+        if output.len() > 1023 {
+            return Err(Error::TooLong);
+        }
+
+        Ok(output)
+    }
+
+    fn prepare(&self, input: &[UChar], buf: &mut Vec<UChar>, flags: i32) -> Result<(), UErrorCode> {
+        let mut err: UErrorCode = U_ZERO_ERROR;
+        let prepped_len = unsafe {
+            icu_stringprep_prepare(
+                self.inner,
+                input.as_ptr(),
+                input.len() as i32,
+                buf.as_mut_ptr(),
+                buf.len() as i32,
+                flags,
+                null_mut(),
+                &mut err,
+            )
+        };
+        if err != U_ZERO_ERROR {
+            return Err(err);
+        }
+        buf.truncate(prepped_len as usize);
+        Ok(())
+    }
+}

jid/Cargo.toml 🔗

@@ -19,5 +19,6 @@ edition = "2018"
 gitlab = { repository = "xmpp-rs/xmpp-rs" }
 
 [dependencies]
+icu = { version = "0.1", optional = true }
 minidom = { version = "0.15", optional = true }
 serde = { version = "1.0", features = ["derive"], optional = true }

jid/src/lib.rs 🔗

@@ -22,8 +22,11 @@ use std::str::FromStr;
 #[cfg(feature = "serde")]
 use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
 
+#[cfg(feature = "icu")]
+use icu::{Icu, Strict};
+
 /// An error that signifies that a `Jid` cannot be parsed from a string.
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq)]
 pub enum JidParseError {
     /// Happens when there is no domain, that is either the string is empty,
     /// starts with a /, or contains the @/ sequence.
@@ -37,6 +40,10 @@ pub enum JidParseError {
 
     /// Happens when the resource is empty, that is the string ends with a /.
     EmptyResource,
+
+    #[cfg(feature = "icu")]
+    /// TODO
+    IcuError(icu::Error),
 }
 
 impl StdError for JidParseError {}
@@ -51,6 +58,8 @@ impl fmt::Display for JidParseError {
                 JidParseError::NoResource => "no resource found in this full JID",
                 JidParseError::EmptyNode => "nodepart empty despite the presence of a @",
                 JidParseError::EmptyResource => "resource empty despite the presence of a /",
+                #[cfg(feature = "icu")]
+                JidParseError::IcuError(_err) => "TODO",
             }
         )
     }
@@ -388,7 +397,19 @@ fn _from_str(s: &str) -> Result<StringJid, JidParseError> {
     } else if let ParserState::Resource = state {
         return Err(JidParseError::EmptyResource);
     }
-    Ok((node, domain.ok_or(JidParseError::NoDomain)?, resource))
+    let domain = domain.ok_or(JidParseError::NoDomain)?;
+    #[cfg(feature = "icu")]
+    let (node, domain, resource) = {
+        let icu = Icu::new().unwrap();
+        let node = node.map(|node| icu.nodeprep(&node, Strict::AllowUnassigned).unwrap());
+        let domain = icu.idna2008.to_unicode(&domain).unwrap();
+        let resource = resource.map(|resource| {
+            icu.resourceprep(&resource, Strict::AllowUnassigned)
+                .unwrap()
+        });
+        (node, domain, resource)
+    };
+    Ok((node, domain, resource))
 }
 
 impl FromStr for FullJid {
@@ -905,4 +926,12 @@ mod tests {
             .build();
         assert_eq!(elem.attr("from"), Some(String::from(bare).as_ref()));
     }
+
+    #[cfg(feature = "icu")]
+    #[test]
+    fn icu_jid() {
+        let full = FullJid::from_str("Test@☃.coM/Test™").unwrap();
+        let equiv = FullJid::new("test", "☃.com", "TestTM");
+        assert_eq!(full, equiv);
+    }
 }