From 126588b47d6101969438f47d70911f76f92c0f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Sch=C3=A4fer?= Date: Thu, 1 May 2025 13:30:44 +0200 Subject: [PATCH] xso: allow FromXml implementations to provide matching hints That way, callers can put multiple candidate implementations in, for example, a sorted vector and more efficiently select candidates to try when looking at a new element. --- xso/ChangeLog | 3 +++ xso/src/fromxml.rs | 62 ++++++++++++++++++++++++++++++++++++++++++ xso/src/lib.rs | 67 +++++++++++++++++++++++++++++++--------------- 3 files changed, 110 insertions(+), 22 deletions(-) diff --git a/xso/ChangeLog b/xso/ChangeLog index 1d34987a368767a75ca28062bd4cd726a9dd324b..e7ff5c185ce985181566fa05fff3b8189250a3b0 100644 --- a/xso/ChangeLog +++ b/xso/ChangeLog @@ -54,6 +54,9 @@ Version NEXT: `AsXmlText` and `FromXmlText` implementations based on the standard library's `Display` and `FromStr` traits. - `AsXmlDyn`, a dyn-compatible variant of `AsXml` (!573). + - `FromXml::xml_name_matcher()` and related types, which allow `FromXml` + implementors to provide cachable hints to callers about which XML + elements are valid candidates for parsing (!573). * Changes - Generated AsXml iterator and FromXml builder types are now doc(hidden), to not clutter hand-written documentation with auto diff --git a/xso/src/fromxml.rs b/xso/src/fromxml.rs index 3b3e1e7f7e5eadee2532913af5c61b33f3abc5dc..230c43a7b69b8f3cdb2b0d571011994c2bc9cd21 100644 --- a/xso/src/fromxml.rs +++ b/xso/src/fromxml.rs @@ -17,6 +17,68 @@ use alloc::boxed::Box; use crate::error::{Error, FromEventsError}; use crate::{FromEventsBuilder, FromXml}; +/// Match an XML element qualified name. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum XmlNameMatcher<'x> { + /// Match any XML element + Any, + + /// Match any XML element in the given namespace. + InNamespace(&'x str), + + /// Match any XML element with the exact namespace/name combination. + Specific(&'x str, &'x str), +} + +impl<'x> XmlNameMatcher<'x> { + /// Return the superset of two `XmlNameMatcher` instances. + pub const fn superset(self, other: Self) -> Self { + match self { + Self::Any => Self::Any, + Self::InNamespace(my_namespace) => match other { + Self::Any => Self::Any, + Self::InNamespace(other_namespace) | Self::Specific(other_namespace, _) => { + if crate::util::const_str_eq(my_namespace, other_namespace) { + Self::InNamespace(my_namespace) + } else { + Self::Any + } + } + }, + Self::Specific(my_namespace, my_name) => match other { + Self::Any => Self::Any, + Self::InNamespace(other_namespace) => { + if crate::util::const_str_eq(my_namespace, other_namespace) { + Self::InNamespace(my_namespace) + } else { + Self::Any + } + } + Self::Specific(other_namespace, other_name) => { + if crate::util::const_str_eq(my_namespace, other_namespace) { + if crate::util::const_str_eq(my_name, other_name) { + Self::Specific(my_name, other_name) + } else { + Self::InNamespace(my_namespace) + } + } else { + Self::Any + } + } + }, + } + } + + /// Return true if the given `qname` matches this matcher. + pub fn matches(&self, qname: &rxml::QName) -> bool { + match self { + Self::Any => true, + Self::InNamespace(ns) => qname.0.as_str() == *ns, + Self::Specific(ns, name) => qname.0.as_str() == *ns && qname.1.as_str() == *name, + } + } +} + /// # Parsing context for [`FromEventsBuilder`] /// /// For the most part, [`FromEventsBuilder`] implementations can work with diff --git a/xso/src/lib.rs b/xso/src/lib.rs index d32d9286f54b1928de1f95ad5264a5168e225e1c..d5a547f63a69560a4ba2c7b557e1586ff1fd80d0 100644 --- a/xso/src/lib.rs +++ b/xso/src/lib.rs @@ -38,6 +38,34 @@ pub mod minidom_compat; mod rxml_util; pub mod text; +// This is a hack to not make `const_str_eq` publicly available, except +// through the `exports` module if the `macros` feature is enabled, but have +// it available internally in all cases. +mod util { + /// Compile-time comparison of two strings. + /// + /// Used by macro-generated code. + /// + /// This is necessary because `::eq` is not `const`. + pub const fn const_str_eq(a: &str, b: &str) -> bool { + let a = a.as_bytes(); + let b = b.as_bytes(); + if a.len() != b.len() { + return false; + } + + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + + true + } +} + #[doc(hidden)] pub mod exports { #[cfg(all(feature = "minidom", feature = "macros"))] @@ -75,35 +103,15 @@ pub mod exports { #[cfg(feature = "macros")] pub type CoreU8 = u8; - /// Compile-time comparison of two strings. - /// - /// Used by macro-generated code. - /// - /// This is necessary because `::eq` is not `const`. #[cfg(feature = "macros")] - pub const fn const_str_eq(a: &'static str, b: &'static str) -> bool { - let a = a.as_bytes(); - let b = b.as_bytes(); - if a.len() != b.len() { - return false; - } - - let mut i = 0; - while i < a.len() { - if a[i] != b[i] { - return false; - } - i += 1; - } - - true - } + pub use super::util::const_str_eq; } use alloc::{borrow::Cow, boxed::Box, string::String, vec::Vec}; #[doc(inline)] pub use fromxml::Context; +use fromxml::XmlNameMatcher; pub use text::TextCodec; @@ -235,6 +243,21 @@ pub trait FromXml { attrs: rxml::AttrMap, ctx: &Context<'_>, ) -> Result; + + /// Return a predicate which determines if `Self` *may* be parsed from + /// a given XML element. + /// + /// The returned matcher **must** match all elements from which `Self` + /// can be parsed, but it may also match elements from which `Self` + /// cannot be parsed. + /// + /// This is an optimisation utility for code locations which have to + /// disambiguate between many `FromXml` implementations. The provided + /// implementation returns a matcher which matches all elements, which is + /// correct, but also very inefficient. + fn xml_name_matcher() -> XmlNameMatcher<'static> { + XmlNameMatcher::Any + } } /// Trait allowing to convert XML text to a value.