From 6f304d197d77142d5d7adfb2ddb7fd13921ce0ee Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Sat, 15 Jul 2023 19:25:14 +0200 Subject: [PATCH] jid: Optimise for no-transform JIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stringprep can make transformations to a JID, the most well-known one is making the nodepart and domainpart lowercase but it does much more than that. It is extremely common to have to validate already-normalised JIDs though, and since https://github.com/sfackler/rust-stringprep/pull/4 this is exactly what the stringprep crate does, by returning Cow::Borrowed() for common ASCII-only cases. This commit further reduces time spent by an additional -15%..-58% when already using this stringprep improvement, in addition to the 89.5%..98.5% change brought by this improvement (and +1.3% total when the JID isn’t normalised yet). For instance, my own full JID parses in 1.83 µs before these changes, 132 ns with just the stringprep optimisation, and 46 ns with also this commit, on an i7-8700K. --- jid/CHANGELOG.md | 1 + jid/Cargo.toml | 2 +- jid/src/inner.rs | 18 +++++++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/jid/CHANGELOG.md b/jid/CHANGELOG.md index 111cb8cb8837ce011a835ccbf91f157616f81201..ef233e9a2a674e22bc108fea708871e7bba4e645 100644 --- a/jid/CHANGELOG.md +++ b/jid/CHANGELOG.md @@ -8,6 +8,7 @@ Unreleased * Additions - Parsing invalid JIDs with stringprep feature no longer results in panic, returning Error with NodePrep, NamePrep or ResourcePrep variant instead (#84) + - Parsing already-normalized JIDs with stringprep is much faster, about 20 times. - JID parts are now typed as NodePart, DomainPart and ResourcePart ; once part into those types, JID operations cannot fail - BareJid::with_resource appends a ResourcePart to a BareJid to produce a FullJid (#204) diff --git a/jid/Cargo.toml b/jid/Cargo.toml index 5758c6446fb4e95736971f83c74b5f217b36a545..63dea0d96be4f0eb57a6a4fdedfae176ae1d0903 100644 --- a/jid/Cargo.toml +++ b/jid/Cargo.toml @@ -22,4 +22,4 @@ gitlab = { repository = "xmpp-rs/xmpp-rs" } memchr = "2.5" minidom = { version = "0.15", optional = true } serde = { version = "1.0", features = ["derive"], optional = true } -stringprep = "0.1.2" +stringprep = "0.1.3" diff --git a/jid/src/inner.rs b/jid/src/inner.rs index e124b42edb83695346bb64164d020d24f2f00f09..644e4c2548c0dd066da11a55302e76ab9ee1963b 100644 --- a/jid/src/inner.rs +++ b/jid/src/inner.rs @@ -13,6 +13,7 @@ use crate::Error; use core::num::NonZeroU16; use memchr::memchr; +use std::borrow::Cow; use std::str::FromStr; use stringprep::{nameprep, nodeprep, resourceprep}; @@ -57,7 +58,12 @@ impl InnerJid { orig_at = Some(node.len()); orig_slash = Some(node.len() + domain.len() + 1); - format!("{node}@{domain}/{resource}") + match (node, domain, resource) { + (Cow::Borrowed(_), Cow::Borrowed(_), Cow::Borrowed(_)) => { + unnormalized.to_string() + } + (node, domain, resource) => format!("{node}@{domain}/{resource}"), + } } (Some(at), None) => { let node = nodeprep(&unnormalized[..at]).map_err(|_| Error::NodePrep)?; @@ -67,7 +73,10 @@ impl InnerJid { length_check(domain.len(), Error::DomainEmpty, Error::DomainTooLong)?; orig_at = Some(node.len()); - format!("{node}@{domain}") + match (node, domain) { + (Cow::Borrowed(_), Cow::Borrowed(_)) => unnormalized.to_string(), + (node, domain) => format!("{node}@{domain}"), + } } (None, Some(slash)) => { let domain = nameprep(&unnormalized[..slash]).map_err(|_| Error::NamePrep)?; @@ -78,7 +87,10 @@ impl InnerJid { length_check(resource.len(), Error::ResourceEmpty, Error::ResourceTooLong)?; orig_slash = Some(domain.len()); - format!("{domain}/{resource}") + match (domain, resource) { + (Cow::Borrowed(_), Cow::Borrowed(_)) => unnormalized.to_string(), + (domain, resource) => format!("{domain}/{resource}"), + } } (None, None) => { let domain = nameprep(unnormalized).map_err(|_| Error::NamePrep)?;