xso: add support for selectively discarding text and attributes

Jonas SchΓ€fer created

Change summary

parsers/src/util/macro_tests.rs | 99 +++++++++++++++++++++++++++++++++++
xso-proc/src/compound.rs        | 87 +++++++++++++++++++++++++++++-
xso-proc/src/enums.rs           |  5 +
xso-proc/src/field/mod.rs       |  8 ++
xso-proc/src/meta.rs            | 93 ++++++++++++++++++++++++++++++++
xso-proc/src/structs.rs         |  3 +
xso/ChangeLog                   |  2 
xso/src/from_xml_doc.md         | 17 ++++++
8 files changed, 309 insertions(+), 5 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs πŸ”—

@@ -2200,3 +2200,102 @@ fn printrawxml() {
     let display = format!("{}", PrintRawXml(&text));
     assert_eq!(display, "<text xmlns='urn:example:ns1'>hello world</text>");
 }
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "foo", discard(attribute = "bar"))]
+struct DiscardAttribute;
+
+#[test]
+fn discard_attribute_ignore_if_present() {
+    #[allow(unused_imports)]
+    use core::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<DiscardAttribute>("<foo xmlns='urn:example:ns1' bar='baz'/>") {
+        Ok(DiscardAttribute) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn discard_attribute_ignore_if_absent() {
+    #[allow(unused_imports)]
+    use core::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<DiscardAttribute>("<foo xmlns='urn:example:ns1'/>") {
+        Ok(DiscardAttribute) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn discard_attribute_absent_roundtrip() {
+    #[allow(unused_imports)]
+    use core::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<DiscardAttribute>("<foo xmlns='urn:example:ns1'/>");
+}
+
+#[test]
+#[cfg_attr(
+    feature = "disable-validation",
+    should_panic = "unexpected result: Ok("
+)]
+fn discard_attribute_fails_on_other_unexpected_attributes() {
+    #[allow(unused_imports)]
+    use core::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<DiscardAttribute>("<foo xmlns='urn:example:ns1' fnord='bar'/>") {
+        Err(xso::error::FromElementError::Invalid(xso::error::Error::Other(e))) => {
+            assert_eq!(e, "Unknown attribute in DiscardAttribute element.");
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "foo", discard(text))]
+struct DiscardText;
+
+#[test]
+fn discard_text_ignore_if_present() {
+    #[allow(unused_imports)]
+    use core::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<DiscardText>("<foo xmlns='urn:example:ns1'>quak</foo>") {
+        Ok(DiscardText) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn discard_text_ignore_if_absent() {
+    #[allow(unused_imports)]
+    use core::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<DiscardText>("<foo xmlns='urn:example:ns1'/>") {
+        Ok(DiscardText) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn discard_text_absent_roundtrip() {
+    #[allow(unused_imports)]
+    use core::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<DiscardText>("<foo xmlns='urn:example:ns1'/>");
+}

xso-proc/src/compound.rs πŸ”—

@@ -7,12 +7,12 @@
 //! Handling of the insides of compound structures (structs and enum variants)
 
 use proc_macro2::{Span, TokenStream};
-use quote::quote;
+use quote::{quote, ToTokens};
 use syn::{spanned::Spanned, *};
 
 use crate::error_message::ParentRef;
 use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit, NestedMatcher};
-use crate::meta::NamespaceRef;
+use crate::meta::{DiscardSpec, Flag, NameRef, NamespaceRef, QNameRef};
 use crate::scope::{mangle_member, AsItemsScope, FromEventsScope};
 use crate::state::{AsItemsSubmachine, FromEventsSubmachine, State};
 use crate::types::{
@@ -55,6 +55,12 @@ pub(crate) struct Compound {
 
     /// Policy defining how to handle unknown children.
     unknown_child_policy: Expr,
+
+    /// Attributes to discard.
+    discard_attr: Vec<(Option<NamespaceRef>, NameRef)>,
+
+    /// Text to discard.
+    discard_text: Flag,
 }
 
 impl Compound {
@@ -63,6 +69,7 @@ impl Compound {
         compound_fields: I,
         unknown_attribute_policy: Option<Ident>,
         unknown_child_policy: Option<Ident>,
+        discard: Vec<DiscardSpec>,
     ) -> Result<Self> {
         let unknown_attribute_policy = resolve_policy(
             unknown_attribute_policy,
@@ -96,10 +103,60 @@ impl Compound {
 
             fields.push(field);
         }
+
+        let mut discard_text = Flag::Absent;
+        let mut discard_attr = Vec::new();
+        for spec in discard {
+            match spec {
+                DiscardSpec::Text { span } => {
+                    if let Some(field) = text_field.as_ref() {
+                        let mut err = Error::new(
+                            *field,
+                            "cannot combine `#[xml(text)]` field with `discard(text)`",
+                        );
+                        err.combine(Error::new(
+                            spec.span(),
+                            "the discard(text) attribute is here",
+                        ));
+                        return Err(err);
+                    }
+                    if let Flag::Present(other) = discard_text {
+                        let mut err = Error::new(
+                            span,
+                            "only one `discard(text)` meta is allowed per compound",
+                        );
+                        err.combine(Error::new(other, "the discard(text) meta is here"));
+                        return Err(err);
+                    }
+
+                    discard_text = Flag::Present(span);
+                }
+
+                DiscardSpec::Attribute {
+                    qname: QNameRef { namespace, name },
+                    span,
+                } => {
+                    let xml_namespace = namespace;
+                    let xml_name = match name {
+                        Some(v) => v,
+                        None => {
+                            return Err(Error::new(
+                                span,
+                                "discard(attribute) must specify a name, e.g. via discard(attribute = \"some-name\")",
+                            ));
+                        }
+                    };
+                    discard_attr.push((xml_namespace, xml_name));
+                }
+            }
+        }
+
         Ok(Self {
             fields,
             unknown_attribute_policy,
             unknown_child_policy,
+            discard_attr,
+            discard_text,
         })
     }
 
@@ -109,6 +166,7 @@ impl Compound {
         container_namespace: &NamespaceRef,
         unknown_attribute_policy: Option<Ident>,
         unknown_child_policy: Option<Ident>,
+        discard: Vec<DiscardSpec>,
     ) -> Result<Self> {
         Self::from_field_defs(
             compound_fields.iter().enumerate().map(|(i, field)| {
@@ -127,6 +185,7 @@ impl Compound {
             }),
             unknown_attribute_policy,
             unknown_child_policy,
+            discard,
         )
     }
 
@@ -165,7 +224,15 @@ impl Compound {
         let mut output_cons = TokenStream::default();
         let mut child_matchers = TokenStream::default();
         let mut fallback_child_matcher = None;
-        let mut text_handler = None;
+        let mut text_handler = if self.discard_text.is_set() {
+            Some(quote! {
+                ::core::result::Result::Ok(::core::ops::ControlFlow::Break(
+                    Self::#default_state_ident { #builder_data_ident }
+                ))
+            })
+        } else {
+            None
+        };
         let mut extra_defs = TokenStream::default();
         let is_tuple = !output_name.is_path();
 
@@ -329,6 +396,19 @@ impl Compound {
             }
         }
 
+        let mut discard_attr = TokenStream::default();
+        for (xml_namespace, xml_name) in self.discard_attr.iter() {
+            let xml_namespace = match xml_namespace {
+                Some(v) => v.to_token_stream(),
+                None => quote! {
+                    ::xso::exports::rxml::Namespace::none()
+                },
+            };
+            discard_attr.extend(quote! {
+                let _ = #attrs.remove(#xml_namespace, #xml_name);
+            });
+        }
+
         let text_handler = match text_handler {
             Some(v) => v,
             None => quote! {
@@ -442,6 +522,7 @@ impl Compound {
                 let #builder_data_ident = #builder_data_ty {
                     #builder_data_init
                 };
+                #discard_attr
                 if #attrs.len() > 0 {
                     let _: () = #unknown_attribute_policy.apply_policy(#unknown_attr_err)?;
                 }

xso-proc/src/enums.rs πŸ”—

@@ -49,6 +49,7 @@ impl NameVariant {
             on_unknown_attribute,
             on_unknown_child,
             transparent,
+            discard,
         } = XmlCompoundMeta::parse_from_attributes(&decl.attrs)?;
 
         reject_key!(debug flag not on "enum variants" only on "enums and structs");
@@ -70,6 +71,7 @@ impl NameVariant {
                 enum_namespace,
                 on_unknown_attribute,
                 on_unknown_child,
+                discard,
             )?,
         })
     }
@@ -275,6 +277,7 @@ impl DynamicVariant {
             on_unknown_attribute: _, // used by StructInner
             on_unknown_child: _,     // used by StructInner
             transparent: _,          // used by StructInner
+            discard: _,              // used by StructInner
         } = meta;
 
         reject_key!(debug flag not on "enum variants" only on "enums and structs");
@@ -391,6 +394,7 @@ impl EnumInner {
             on_unknown_attribute,
             on_unknown_child,
             transparent,
+            discard,
         } = meta;
 
         // These must've been cleared by the caller. Because these being set
@@ -404,6 +408,7 @@ impl EnumInner {
         reject_key!(transparent flag not on "enums" only on "structs");
         reject_key!(on_unknown_attribute not on "enums" only on "enum variants and structs");
         reject_key!(on_unknown_child not on "enums" only on "enum variants and structs");
+        reject_key!(discard vec not on "enums" only on "enum variants and structs");
 
         if let Some(namespace) = namespace {
             Ok(Self::NameSwitched(NameSwitchedEnum::new(

xso-proc/src/field/mod.rs πŸ”—

@@ -391,8 +391,12 @@ fn new_field(
                     &xml_namespace,
                 ));
             }
-            let parts =
-                Compound::from_field_defs(field_defs, on_unknown_attribute, on_unknown_child)?;
+            let parts = Compound::from_field_defs(
+                field_defs,
+                on_unknown_attribute,
+                on_unknown_child,
+                vec![],
+            )?;
 
             Ok(Box::new(ChildField {
                 default_,

xso-proc/src/meta.rs πŸ”—

@@ -60,6 +60,25 @@ macro_rules! reject_key {
             ));
         }
     };
+
+    ($key:ident vec not on $not_allowed_on:literal $(only on $only_allowed_on:literal)?) => {
+        if let Some(ref $key) = $key.first() {
+            return Err(Error::new(
+                $key.span(),
+                concat!(
+                    "`",
+                    stringify!($key),
+                    "` is not allowed on ",
+                    $not_allowed_on,
+                    $(
+                        " (only on ",
+                        $only_allowed_on,
+                        ")",
+                    )?
+                ),
+            ));
+        }
+    };
 }
 
 pub(crate) use reject_key;
@@ -328,6 +347,69 @@ impl QNameRef {
     }
 }
 
+/// Identifies XML content to discard.
+#[derive(Debug)]
+pub(crate) enum DiscardSpec {
+    /// `#[xml(discard(attribute..))]`
+    Attribute {
+        /// The span of the nested meta from which this was parsed.
+        ///
+        /// This is useful for error messages.
+        span: Span,
+
+        /// The value assigned to `namespace` and `name` fields inside
+        /// `#[xml(discard(attribute(..)))]`, if any.
+        qname: QNameRef,
+    },
+
+    /// `#[xml(discard(text))]`
+    Text {
+        /// The span of the nested meta from which this was parsed.
+        ///
+        /// This is useful for error messages.
+        span: Span,
+    },
+}
+
+impl DiscardSpec {
+    pub(crate) fn span(&self) -> Span {
+        match self {
+            Self::Attribute { ref span, .. } => *span,
+            Self::Text { ref span, .. } => *span,
+        }
+    }
+}
+
+impl TryFrom<XmlFieldMeta> for DiscardSpec {
+    type Error = syn::Error;
+
+    fn try_from(other: XmlFieldMeta) -> Result<Self> {
+        match other {
+            XmlFieldMeta::Attribute {
+                span,
+                qname,
+                default_,
+                type_,
+                codec,
+            } => {
+                reject_key!(default_ flag not on "discard specifications" only on "fields");
+                reject_key!(type_ not on "discard specifications" only on "fields");
+                reject_key!(codec not on "discard specifications" only on "fields");
+                Ok(Self::Attribute { span, qname })
+            }
+            XmlFieldMeta::Text { span, type_, codec } => {
+                reject_key!(type_ not on "discard specifications" only on "fields");
+                reject_key!(codec not on "discard specifications" only on "fields");
+                Ok(Self::Text { span })
+            }
+            other => Err(Error::new(
+                other.span(),
+                "cannot discard this kind of child",
+            )),
+        }
+    }
+}
+
 /// Contents of an `#[xml(..)]` attribute on a struct, enum variant, or enum.
 #[derive(Debug)]
 pub(crate) struct XmlCompoundMeta {
@@ -362,6 +444,9 @@ pub(crate) struct XmlCompoundMeta {
 
     /// The transparent flag.
     pub(crate) transparent: Flag,
+
+    /// Items to discard.
+    pub(crate) discard: Vec<DiscardSpec>,
 }
 
 impl XmlCompoundMeta {
@@ -378,6 +463,7 @@ impl XmlCompoundMeta {
         let mut debug = Flag::Absent;
         let mut exhaustive = Flag::Absent;
         let mut transparent = Flag::Absent;
+        let mut discard = Vec::new();
 
         attr.parse_nested_meta(|meta| {
             if meta.path.is_ident("debug") {
@@ -428,6 +514,12 @@ impl XmlCompoundMeta {
                 }
                 transparent = (&meta.path).into();
                 Ok(())
+            } else if meta.path.is_ident("discard") {
+                meta.parse_nested_meta(|meta| {
+                    discard.push(XmlFieldMeta::parse_from_meta(meta)?.try_into()?);
+                    Ok(())
+                })?;
+                Ok(())
             } else {
                 match qname.parse_incremental_from_meta(meta)? {
                     None => Ok(()),
@@ -446,6 +538,7 @@ impl XmlCompoundMeta {
             on_unknown_child,
             exhaustive,
             transparent,
+            discard,
         })
     }
 

xso-proc/src/structs.rs πŸ”—

@@ -72,6 +72,7 @@ impl StructInner {
             on_unknown_attribute,
             on_unknown_child,
             transparent,
+            discard,
         } = meta;
 
         // These must've been cleared by the caller. Because these being set
@@ -88,6 +89,7 @@ impl StructInner {
             reject_key!(name not on "transparent structs");
             reject_key!(on_unknown_attribute not on "transparent structs");
             reject_key!(on_unknown_child not on "transparent structs");
+            reject_key!(discard vec not on "transparent structs");
 
             let fields_span = fields.span();
             let fields = match fields {
@@ -152,6 +154,7 @@ impl StructInner {
                     &xml_namespace,
                     on_unknown_attribute,
                     on_unknown_child,
+                    discard,
                 )?,
                 xml_namespace,
                 xml_name,

xso/ChangeLog πŸ”—

@@ -36,6 +36,8 @@ Version NEXT:
       - Support for `no_std` usage (the alloc crate is required, though).
       - Add a PrintRawXml helper struct to be able to display raw xml, useful
         for debug logs.
+      - Support to selectively discard attributes or text content during
+        parsing (!552).
     * Changes
       - Generated AsXml iterator and FromXml builder types are now
         doc(hidden), to not clutter hand-written documentation with auto

xso/src/from_xml_doc.md πŸ”—

@@ -70,6 +70,7 @@ The following keys are defined on structs:
 | `iterator` | optional *ident* | The name to use for the generated iterator type. |
 | `on_unknown_attribute` | optional *ident* | Name of an [`UnknownAttributePolicy`] member, controlling how unknown attributes are handled. |
 | `on_unknown_child` | optional *ident* | Name of an [`UnknownChildPolicy`] member, controlling how unknown children are handled. |
+| `discard` | optional *nested* | Contains field specifications of content to ignore. See below for details. |
 
 Note that the `name` value must be a valid XML element name, without colons.
 The namespace prefix, if any, is assigned automatically at serialisation time
@@ -98,6 +99,20 @@ implement [`FromXml`] in order to derive `FromXml` and [`AsXml`] in order to
 derive `AsXml`. The struct will be (de-)serialised exactly like the type of
 that single field. This allows a newtype-like pattern for XSO structs.
 
+`discard` may contain zero or more field meta which describe XML content to
+silently ignore. The syntax is the same as within the `#[xml(..)]` meta used
+on fields, however, any parameters which aren't strictly needed to match the
+content are rejected (for example, you cannot set the codec on a discarded
+attribute because it is irrelevant). Discarded content is never emitted during
+serialisation. Its absence does not cause errors.
+
+```
+# use xso::FromXml;
+#[derive(FromXml, Debug, PartialEq)]
+#[xml(namespace = "urn:example", name = "foo", discard(text))]
+struct Foo;
+```
+
 ## Enums
 
 Two different `enum` flavors are supported:
@@ -126,6 +141,7 @@ The following keys are defined on name-switched enums:
 | `builder` | optional *ident* | The name to use for the generated builder type. |
 | `iterator` | optional *ident* | The name to use for the generated iterator type. |
 | `exhaustive` | *flag* | If present, the enum considers itself authoritative for its namespace; unknown elements within the namespace are rejected instead of treated as mismatch. |
+| `discard` | optional *nested* | Contains field specifications of content to ignore. See the struct meta docs for details. |
 
 All variants of a name-switched enum live within the same namespace and are
 distinguished exclusively by their XML name within that namespace. The
@@ -194,6 +210,7 @@ The following keys are defined on dynamic enums:
 | --- | --- | --- |
 | `builder` | optional *ident* | The name to use for the generated builder type. |
 | `iterator` | optional *ident* | The name to use for the generated iterator type. |
+| `discard` | optional *nested* | Contains field specifications of content to ignore. See the struct meta docs for details. |
 
 For details on `builder` and `iterator`, see the [Struct meta](#struct-meta)
 documentation above.