icu: Remove this obsolete attempt

Emmanuel Gil Peyrot created

Change summary

Cargo.toml            |   2 
icu/Cargo.toml        |  12 ---
icu/build.rs          |   5 -
icu/src/bindings.c    |  54 ---------------
icu/src/bindings.rs   | 149 -------------------------------------------
icu/src/error.rs      |  51 --------------
icu/src/idna2008.rs   |  69 --------------------
icu/src/lib.rs        | 154 ---------------------------------------------
icu/src/spoof.rs      |  52 ---------------
icu/src/stringprep.rs |  88 -------------------------
10 files changed, 636 deletions(-)

Detailed changes

Cargo.toml 🔗

@@ -1,6 +1,5 @@
 [workspace]
 members = [  # alphabetically sorted
-  "icu",
   "jid",
   "minidom",
   "parsers",
@@ -9,7 +8,6 @@ members = [  # alphabetically sorted
 ]
 
 [patch.crates-io]
-icu = { path = "icu" }
 jid = { path = "jid" }
 minidom = { path = "minidom" }
 tokio-xmpp = { path = "tokio-xmpp" }

icu/Cargo.toml 🔗

@@ -1,12 +0,0 @@
-[package]
-name = "icu"
-version = "0.1.0"
-authors = ["Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-
-[build-dependencies]
-cc = "1"

icu/build.rs 🔗

@@ -1,5 +0,0 @@
-fn main() {
-    cc::Build::new().file("src/bindings.c").compile("bindings");
-    println!("cargo:rustc-link-lib=dylib=icuuc");
-    println!("cargo:rustc-link-lib=dylib=icui18n");
-}

icu/src/bindings.c 🔗

@@ -1,54 +0,0 @@
-// This file is a stupid wrapper to avoid the automated suffixing libicu is
-// doing in unicode/urename.h.
-//
-// By default it will suffix each of its symbols with "_65" (with 65 being the
-// soname), which completely messes with Rust’s binding ability.
-
-#include <unicode/umachine.h>
-#include <unicode/utypes.h>
-#include <unicode/usprep.h>
-#include <unicode/utrace.h>
-#include <unicode/uidna.h>
-#include <unicode/uspoof.h>
-#include <unicode/ustring.h>
-#include <string.h>
-
-const char* icu_error_code_to_name(UErrorCode code) {
-	return u_errorName(code);
-}
-
-UIDNA* icu_idna_open(uint32_t options, UErrorCode* pErrorCode) {
-	return uidna_openUTS46(options, pErrorCode);
-}
-
-int32_t icu_idna_name_to_ascii(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) {
-	return uidna_nameToASCII_UTF8(idna, name, length, dest, capacity, pInfo, pErrorCode);
-}
-
-int32_t icu_idna_name_to_unicode(const UIDNA* idna, const char* name, int32_t length, char* dest, int32_t capacity, UIDNAInfo* pInfo, UErrorCode* pErrorCode) {
-	return uidna_nameToUnicodeUTF8(idna, name, length, dest, capacity, pInfo, pErrorCode);
-}
-
-UStringPrepProfile* icu_stringprep_open(UStringPrepProfileType type, UErrorCode* status) {
-	return usprep_openByType(type, status);
-}
-
-int32_t icu_stringprep_prepare(const UStringPrepProfile* prep, const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UParseError* parseError, UErrorCode* status) {
-	return usprep_prepare(prep, src, srcLength, dest, destCapacity, options, parseError, status);
-}
-
-void icu_trace_set_level(UTraceLevel traceLevel) {
-	utrace_setLevel(traceLevel);
-}
-
-USpoofChecker* icu_spoof_open(UErrorCode* status) {
-	return uspoof_open(status);
-}
-
-void icu_spoof_set_checks(USpoofChecker* sc, int32_t checks, UErrorCode* status) {
-	uspoof_setChecks(sc, checks, status);
-}
-
-int32_t icu_spoof_get_skeleton(USpoofChecker* sc, uint32_t type, const char* id, int32_t length, char* dest, int32_t destCapacity, UErrorCode* status) {
-	return uspoof_getSkeletonUTF8(sc, type, id, length, dest, destCapacity, status);
-}

icu/src/bindings.rs 🔗

@@ -1,149 +0,0 @@
-//! Crate wrapping what we need from ICU’s C API for JIDs.
-//!
-//! See <http://site.icu-project.org/>
-
-use std::os::raw::c_char;
-
-// From unicode/umachine.h
-pub(crate) type UChar = u16;
-
-// From unicode/utypes.h
-pub(crate) type UErrorCode = u32;
-pub(crate) const U_ZERO_ERROR: UErrorCode = 0;
-
-pub(crate) type UStringPrepProfile = u32;
-type UParseError = u32;
-
-// From unicode/usprep.h
-pub(crate) const USPREP_DEFAULT: i32 = 0;
-pub(crate) const USPREP_ALLOW_UNASSIGNED: i32 = 1;
-
-pub(crate) type UStringPrepProfileType = u32;
-pub(crate) const USPREP_RFC3491_NAMEPREP: UStringPrepProfileType = 0;
-pub(crate) const USPREP_RFC3920_NODEPREP: UStringPrepProfileType = 7;
-pub(crate) const USPREP_RFC3920_RESOURCEPREP: UStringPrepProfileType = 8;
-pub(crate) const USPREP_RFC4013_SASLPREP: UStringPrepProfileType = 10;
-
-// From unicode/utrace.h
-type UTraceLevel = i32;
-pub(crate) const UTRACE_VERBOSE: UTraceLevel = 9;
-
-// From unicode/uidna.h
-#[repr(C)]
-pub(crate) struct UIDNA {
-    _unused: [u8; 0],
-}
-type UBool = i8;
-
-#[repr(C)]
-pub(crate) struct UIDNAInfo {
-    size: i16,
-    is_transitional_different: UBool,
-    reserved_b3: UBool,
-    errors: u32,
-    reserved_i2: i32,
-    reserved_i3: i32,
-}
-
-impl UIDNAInfo {
-    pub(crate) fn new() -> UIDNAInfo {
-        assert_eq!(std::mem::size_of::<UIDNAInfo>(), 16);
-        UIDNAInfo {
-            size: std::mem::size_of::<UIDNAInfo>() as i16,
-            is_transitional_different: false as UBool,
-            reserved_b3: false as UBool,
-            errors: 0,
-            reserved_i2: 0,
-            reserved_i3: 0,
-        }
-    }
-
-    // TODO: Return a String instead, or a custom error type, this is a bitflag (defined in
-    // uidna.h) where multiple errors can be accumulated.
-    pub(crate) fn get_errors(&self) -> u32 {
-        self.errors
-    }
-}
-
-pub(crate) const UIDNA_DEFAULT: u32 = 0;
-pub(crate) const UIDNA_USE_STD3_RULES: u32 = 2;
-
-pub(crate) type UIdnaFunction = unsafe extern "C" fn(
-    *const UIDNA,
-    *const u8,
-    i32,
-    *mut u8,
-    i32,
-    *mut UIDNAInfo,
-    *mut u32,
-) -> i32;
-
-// From unicode/uspoof.h
-#[repr(C)]
-pub(crate) struct USpoofChecker {
-    _unused: [u8; 0],
-}
-pub(crate) const USPOOF_CONFUSABLE: i32 = 7;
-
-#[link(name = "bindings")]
-extern "C" {
-    // From unicode/ustring.h
-    pub(crate) fn icu_error_code_to_name(code: UErrorCode) -> *const c_char;
-
-    // From unicode/usprep.h
-    pub(crate) fn icu_stringprep_open(
-        type_: UStringPrepProfileType,
-        status: *mut UErrorCode,
-    ) -> *mut UStringPrepProfile;
-    pub(crate) fn icu_stringprep_prepare(
-        prep: *const UStringPrepProfile,
-        src: *const UChar,
-        srcLength: i32,
-        dest: *mut UChar,
-        destCapacity: i32,
-        options: i32,
-        parseError: *mut UParseError,
-        status: *mut UErrorCode,
-    ) -> i32;
-
-    // From unicode/utrace.h
-    pub(crate) fn icu_trace_set_level(traceLevel: UTraceLevel);
-
-    // From unicode/uidna.h
-    pub(crate) fn icu_idna_open(options: u32, pErrorCode: *mut UErrorCode) -> *mut UIDNA;
-    pub(crate) fn icu_idna_name_to_ascii(
-        idna: *const UIDNA,
-        name: *const u8,
-        length: i32,
-        dest: *mut u8,
-        capacity: i32,
-        pInfo: *mut UIDNAInfo,
-        pErrorCode: *mut UErrorCode,
-    ) -> i32;
-    pub(crate) fn icu_idna_name_to_unicode(
-        idna: *const UIDNA,
-        name: *const u8,
-        length: i32,
-        dest: *mut u8,
-        capacity: i32,
-        pInfo: *mut UIDNAInfo,
-        pErrorCode: *mut UErrorCode,
-    ) -> i32;
-
-    // From unicode/uspoof.h
-    pub(crate) fn icu_spoof_open(status: *mut UErrorCode) -> *mut USpoofChecker;
-    pub(crate) fn icu_spoof_set_checks(
-        sc: *mut USpoofChecker,
-        checks: i32,
-        status: *mut UErrorCode,
-    );
-    pub(crate) fn icu_spoof_get_skeleton(
-        sc: *const USpoofChecker,
-        type_: u32,
-        id: *const u8,
-        length: i32,
-        dest: *mut u8,
-        destCapacity: i32,
-        status: *mut UErrorCode,
-    ) -> i32;
-}

icu/src/error.rs 🔗

@@ -1,51 +0,0 @@
-//! Crate wrapping what we need from ICU’s C API for JIDs.
-//!
-//! See <http://site.icu-project.org/>
-
-use crate::bindings::{icu_error_code_to_name, UErrorCode};
-use std::ffi::CStr;
-
-/// Errors this library can produce.
-#[derive(Debug, PartialEq, Eq)]
-pub enum Error {
-    /// An error produced by one of the ICU functions.
-    Icu(String),
-
-    /// An error produced by one of the IDNA2008 ICU functions.
-    Idna(u32),
-
-    /// Some ICU function didn’t produce a valid UTF-8 string, should never happen.
-    Utf8(std::string::FromUtf8Error),
-
-    /// Some ICU function didn’t produce a valid UTF-8 string, should never happen.
-    Utf16(std::char::DecodeUtf16Error),
-
-    /// Some string was too long for its profile in JID.
-    TooLong,
-}
-
-impl Error {
-    pub(crate) fn from_icu_code(err: UErrorCode) -> Error {
-        let ptr = unsafe { icu_error_code_to_name(err) };
-        let c_str = unsafe { CStr::from_ptr(ptr) };
-        Error::Icu(c_str.to_string_lossy().into_owned())
-    }
-}
-
-impl From<UErrorCode> for Error {
-    fn from(err: UErrorCode) -> Error {
-        Error::from_icu_code(err)
-    }
-}
-
-impl From<std::string::FromUtf8Error> for Error {
-    fn from(err: std::string::FromUtf8Error) -> Error {
-        Error::Utf8(err)
-    }
-}
-
-impl From<std::char::DecodeUtf16Error> for Error {
-    fn from(err: std::char::DecodeUtf16Error) -> Error {
-        Error::Utf16(err)
-    }
-}

icu/src/idna2008.rs 🔗

@@ -1,69 +0,0 @@
-//! Crate wrapping what we need from ICU’s C API for JIDs.
-//!
-//! See <http://site.icu-project.org/>
-
-use crate::bindings::{
-    icu_idna_name_to_ascii, icu_idna_name_to_unicode, icu_idna_open, UErrorCode, UIDNAInfo,
-    UIdnaFunction, UIDNA, U_ZERO_ERROR,
-};
-use crate::error::Error;
-
-/// TODO: IDNA2008 support.
-pub struct Idna {
-    inner: *mut UIDNA,
-}
-
-impl Idna {
-    /// Create a new Idna struct.
-    pub fn new(options: u32) -> Result<Idna, UErrorCode> {
-        let mut err: UErrorCode = U_ZERO_ERROR;
-        let inner = unsafe { icu_idna_open(options, &mut err) };
-        match err {
-            U_ZERO_ERROR => Ok(Idna { inner }),
-            err => Err(err),
-        }
-    }
-
-    /// Converts a whole domain name into its ASCII form for DNS lookup.
-    pub fn to_ascii(&self, input: &str) -> Result<String, Error> {
-        self.idna(input, icu_idna_name_to_ascii)
-    }
-
-    /// Converts a whole domain name into its Unicode form for human-readable display.
-    pub fn to_unicode(&self, input: &str) -> Result<String, Error> {
-        self.idna(input, icu_idna_name_to_unicode)
-    }
-
-    fn idna(&self, input: &str, function: UIdnaFunction) -> Result<String, Error> {
-        if input.len() > 255 {
-            return Err(Error::TooLong);
-        }
-
-        let mut err: UErrorCode = U_ZERO_ERROR;
-        let mut dest: Vec<u8> = vec![0u8; 256];
-        let mut info = UIDNAInfo::new();
-        let len = unsafe {
-            function(
-                self.inner,
-                input.as_ptr(),
-                input.len() as i32,
-                dest.as_mut_ptr(),
-                dest.len() as i32,
-                &mut info,
-                &mut err,
-            )
-        };
-        if err != U_ZERO_ERROR {
-            return Err(Error::from_icu_code(err));
-        }
-        let errors = info.get_errors();
-        if errors != 0 {
-            return Err(Error::Idna(errors));
-        }
-        if len > 255 {
-            return Err(Error::TooLong);
-        }
-        dest.truncate(len as usize);
-        Ok(String::from_utf8(dest)?)
-    }
-}

icu/src/lib.rs 🔗

@@ -1,154 +0,0 @@
-//! Crate wrapping what we need from ICU’s C API for JIDs.
-//!
-//! See <http://site.icu-project.org/>
-
-#![deny(missing_docs)]
-
-mod bindings;
-mod error;
-mod idna2008;
-mod spoof;
-mod stringprep;
-
-use crate::bindings::{
-    icu_trace_set_level, UIDNA_DEFAULT, UIDNA_USE_STD3_RULES, USPOOF_CONFUSABLE,
-    USPREP_RFC3491_NAMEPREP, USPREP_RFC3920_NODEPREP, USPREP_RFC3920_RESOURCEPREP,
-    USPREP_RFC4013_SASLPREP, UTRACE_VERBOSE,
-};
-pub use crate::error::Error;
-pub use crate::idna2008::Idna;
-pub use crate::spoof::SpoofChecker;
-use crate::stringprep::Stringprep;
-
-/// How unassigned codepoints should be handled.
-pub enum Strict {
-    /// All codepoints should be assigned, otherwise an error will be emitted.
-    True,
-
-    /// Codepoints can be unassigned.
-    AllowUnassigned,
-}
-
-/// Main struct of this module, exposing the needed ICU functions to JID.
-pub struct Icu {
-    nameprep: Stringprep,
-    nodeprep: Stringprep,
-    resourceprep: Stringprep,
-    saslprep: Stringprep,
-
-    /// IDNA2008 support.
-    ///
-    /// See [RFC5891](https://tools.ietf.org/html/rfc5891).
-    pub idna2008: Idna,
-
-    /// Spoof checker TODO: better doc.
-    pub spoofchecker: SpoofChecker,
-}
-
-impl Icu {
-    /// Create a new ICU struct, initialising stringprep profiles, IDNA2008, as well as a spoof
-    /// checker.
-    pub fn new() -> Result<Icu, Error> {
-        unsafe { icu_trace_set_level(UTRACE_VERBOSE) };
-
-        let nameprep = Stringprep::new(USPREP_RFC3491_NAMEPREP)?;
-        let nodeprep = Stringprep::new(USPREP_RFC3920_NODEPREP)?;
-        let resourceprep = Stringprep::new(USPREP_RFC3920_RESOURCEPREP)?;
-        let saslprep = Stringprep::new(USPREP_RFC4013_SASLPREP)?;
-
-        let mut options = UIDNA_DEFAULT;
-        options |= UIDNA_USE_STD3_RULES;
-        let idna2008 = Idna::new(options)?;
-
-        let spoofchecker = SpoofChecker::new(USPOOF_CONFUSABLE)?;
-
-        Ok(Icu {
-            nameprep,
-            nodeprep,
-            resourceprep,
-            saslprep,
-            idna2008,
-            spoofchecker,
-        })
-    }
-
-    /// Perform stringprep using the Nameprep profile.
-    ///
-    /// See [RFC3491](https://tools.ietf.org/html/rfc3491).
-    pub fn nameprep(&self, string: &str, strict: Strict) -> Result<String, Error> {
-        self.nameprep.stringprep(string, strict)
-    }
-
-    /// Perform stringprep using the Nodeprep profile.
-    ///
-    /// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A).
-    pub fn nodeprep(&self, string: &str, strict: Strict) -> Result<String, Error> {
-        self.nodeprep.stringprep(string, strict)
-    }
-
-    /// Perform stringprep using the Resourceprep profile.
-    ///
-    /// See [RFC6122 appendix A](https://tools.ietf.org/html/rfc6122#appendix-A).
-    pub fn resourceprep(&self, string: &str, strict: Strict) -> Result<String, Error> {
-        self.resourceprep.stringprep(string, strict)
-    }
-
-    /// Perform stringprep using the Saslprep profile.
-    ///
-    /// See [RFC4013](https://tools.ietf.org/html/rfc4013).
-    pub fn saslprep(&self, string: &str, strict: Strict) -> Result<String, Error> {
-        self.saslprep.stringprep(string, strict)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn nameprep() {
-        let name = "Link";
-        let icu = Icu::new().unwrap();
-        let name = icu.nodeprep.stringprep(name, Strict::True).unwrap();
-        assert_eq!(name, "link");
-    }
-
-    #[test]
-    fn resourceprep() {
-        let name = "Test™";
-        let icu = Icu::new().unwrap();
-        let name = icu
-            .resourceprep
-            .stringprep(name, Strict::AllowUnassigned)
-            .unwrap();
-        assert_eq!(name, "TestTM");
-    }
-
-    #[test]
-    fn idna() {
-        let name = "☃.coM";
-        let icu = Icu::new().unwrap();
-        let name = icu.idna2008.to_ascii(name).unwrap();
-        assert_eq!(name, "xn--n3h.com");
-
-        let name = "xn--N3H.com";
-        let icu = Icu::new().unwrap();
-        let name = icu.idna2008.to_unicode(name).unwrap();
-        assert_eq!(name, "☃.com");
-    }
-
-    #[test]
-    fn spoof() {
-        // Non-breakable and narrow non-breakable spaces spoofing.
-        let name = "foo bar baz";
-        let icu = Icu::new().unwrap();
-        let name = icu.spoofchecker.get_skeleton(name).unwrap();
-        assert_eq!(name, "foo bar baz");
-
-        // Cyrillic spoofing.
-        let name = "Неllо wоrld";
-        let icu = Icu::new().unwrap();
-        let name = icu.spoofchecker.get_skeleton(name).unwrap();
-        assert_eq!(name, "Hello world");
-    }
-}

icu/src/spoof.rs 🔗

@@ -1,52 +0,0 @@
-//! Crate wrapping what we need from ICU’s C API for JIDs.
-//!
-//! See <http://site.icu-project.org/>
-
-use crate::bindings::{
-    icu_spoof_get_skeleton, icu_spoof_open, icu_spoof_set_checks, UErrorCode, USpoofChecker,
-    U_ZERO_ERROR,
-};
-use crate::error::Error;
-
-/// TODO: spoof checker.
-pub struct SpoofChecker {
-    inner: *mut USpoofChecker,
-}
-
-impl SpoofChecker {
-    /// Create a new SpoofChecker.
-    pub fn new(checks: i32) -> Result<SpoofChecker, UErrorCode> {
-        let mut err: UErrorCode = U_ZERO_ERROR;
-        let inner = unsafe { icu_spoof_open(&mut err) };
-        if err != U_ZERO_ERROR {
-            return Err(err);
-        }
-        unsafe { icu_spoof_set_checks(inner, checks, &mut err) };
-        if err != U_ZERO_ERROR {
-            return Err(err);
-        }
-        Ok(SpoofChecker { inner })
-    }
-
-    /// Transform a string into a skeleton for matching it with other potentially similar strings.
-    pub fn get_skeleton(&self, input: &str) -> Result<String, Error> {
-        let mut err: UErrorCode = U_ZERO_ERROR;
-        let mut dest: Vec<u8> = vec![0u8; 256];
-        let len = unsafe {
-            icu_spoof_get_skeleton(
-                self.inner,
-                0,
-                input.as_ptr(),
-                input.len() as i32,
-                dest.as_mut_ptr(),
-                dest.len() as i32,
-                &mut err,
-            )
-        };
-        if err != U_ZERO_ERROR {
-            return Err(Error::from_icu_code(err));
-        }
-        dest.truncate(len as usize);
-        Ok(String::from_utf8(dest)?)
-    }
-}

icu/src/stringprep.rs 🔗

@@ -1,88 +0,0 @@
-//! Crate wrapping what we need from ICU’s C API for JIDs.
-//!
-//! See <http://site.icu-project.org/>
-
-use crate::bindings::{
-    icu_stringprep_open, icu_stringprep_prepare, UChar, UErrorCode, UStringPrepProfile,
-    UStringPrepProfileType, USPREP_ALLOW_UNASSIGNED, USPREP_DEFAULT, U_ZERO_ERROR,
-};
-use crate::error::Error;
-use crate::Strict;
-use std::ptr::null_mut;
-
-/// Struct representing a given stringprep profile.
-pub(crate) struct Stringprep {
-    inner: *mut UStringPrepProfile,
-}
-
-impl Stringprep {
-    /// Create a new Stringprep struct for the given profile.
-    pub(crate) fn new(profile: UStringPrepProfileType) -> Result<Stringprep, UErrorCode> {
-        let mut err: UErrorCode = U_ZERO_ERROR;
-        let inner = unsafe { icu_stringprep_open(profile, &mut err) };
-        match err {
-            U_ZERO_ERROR => Ok(Stringprep { inner }),
-            err => Err(err),
-        }
-    }
-
-    /// Perform a stringprep operation using this profile.
-    ///
-    /// # Panics
-    /// Panics if ICU doesn’t return a valid UTF-16 string, which should never happen.
-    pub(crate) fn stringprep(&self, input: &str, strict: Strict) -> Result<String, Error> {
-        if input.len() > 1023 {
-            return Err(Error::TooLong);
-        }
-
-        // ICU works on UTF-16 data, so convert it first.
-        let unprepped: Vec<UChar> = input.encode_utf16().collect();
-
-        // Now do the actual stringprep operation.
-        let mut prepped: Vec<UChar> = vec![0u16; 1024];
-        let flags = match strict {
-            Strict::True => USPREP_DEFAULT,
-            Strict::AllowUnassigned => USPREP_ALLOW_UNASSIGNED,
-        };
-        self.prepare(&unprepped, &mut prepped, flags)?;
-
-        // And then convert it back to UTF-8.
-        let output = std::char::decode_utf16(prepped.into_iter())
-            //.map(Result::unwrap)
-            .try_fold(Vec::new(), |mut acc, c| match c {
-                Ok(c) => {
-                    acc.push(c);
-                    Ok(acc)
-                }
-                Err(err) => Err(err),
-            })?;
-        let output: String = output.into_iter().collect();
-
-        if output.len() > 1023 {
-            return Err(Error::TooLong);
-        }
-
-        Ok(output)
-    }
-
-    fn prepare(&self, input: &[UChar], buf: &mut Vec<UChar>, flags: i32) -> Result<(), UErrorCode> {
-        let mut err: UErrorCode = U_ZERO_ERROR;
-        let prepped_len = unsafe {
-            icu_stringprep_prepare(
-                self.inner,
-                input.as_ptr(),
-                input.len() as i32,
-                buf.as_mut_ptr(),
-                buf.len() as i32,
-                flags,
-                null_mut(),
-                &mut err,
-            )
-        };
-        if err != U_ZERO_ERROR {
-            return Err(err);
-        }
-        buf.truncate(prepped_len as usize);
-        Ok(())
-    }
-}