state.rs

  1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7//! State machines for parsing and serialising of structs and enums.
  8
  9use proc_macro2::TokenStream;
 10use quote::{quote, ToTokens};
 11use syn::*;
 12
 13/// A single state in a parser or serializer state machine.
 14pub(crate) struct State {
 15    /// Name of the state enum variant for this state.
 16    name: Ident,
 17
 18    /// Declaration of members of the state enum in this state.
 19    decl: TokenStream,
 20
 21    /// Destructuring of members of the state enum in this state.
 22    destructure: TokenStream,
 23
 24    /// Right-hand-side of the match arm for this state.
 25    advance_body: TokenStream,
 26}
 27
 28impl State {
 29    /// Create a new state with the a builder data field.
 30    ///
 31    /// This is a convenience wrapper around `new()` and `add_field()`. This
 32    /// wrapper, or its equivalent, **must** be used for states used in
 33    /// [`FromEventsStateMachine`] state machines, as those expect that the
 34    /// first field is the builder data at render time.
 35    pub(crate) fn new_with_builder(
 36        name: Ident,
 37        builder_data_ident: &Ident,
 38        builder_data_ty: &Type,
 39    ) -> Self {
 40        let mut result = Self::new(name);
 41        result.add_field(builder_data_ident, builder_data_ty);
 42        result
 43    }
 44
 45    /// Create a new, empty state.
 46    ///
 47    /// Note that an empty state will generate invalid code. At the very
 48    /// least, a body must be added using [`Self::set_impl`] or
 49    /// [`Self::with_impl`]. The various state machines may also have
 50    /// additional requirements.
 51    pub(crate) fn new(name: Ident) -> Self {
 52        Self {
 53            name,
 54            decl: TokenStream::default(),
 55            destructure: TokenStream::default(),
 56            advance_body: TokenStream::default(),
 57        }
 58    }
 59
 60    /// Add a field to this state's data.
 61    ///
 62    /// - `name` is the name under which the data will be accessible in the
 63    ///   state's implementation.
 64    /// - `ty` must be the data field's type.
 65    pub(crate) fn add_field(&mut self, name: &Ident, ty: &Type) {
 66        self.decl.extend(quote! { #name: #ty, });
 67        self.destructure.extend(quote! { #name, });
 68    }
 69
 70    /// Modify the state to include another field and return the modified
 71    /// state.
 72    ///
 73    /// This is a consume-and-return-style version of [`Self::add_field`].
 74    pub(crate) fn with_field(mut self, name: &Ident, ty: &Type) -> Self {
 75        self.add_field(name, ty);
 76        self
 77    }
 78
 79    /// Set the `advance` implementation of this state.
 80    ///
 81    /// `body` must be the body of the right hand side of the match arm for
 82    /// the `advance` implementation of the state machine.
 83    ///
 84    /// See [`FromEventsStateMachine::advance_match_arms`] and
 85    /// [`AsItemsSubmachine::compile`] for the respective
 86    /// requirements on the implementations.
 87    pub(crate) fn with_impl(mut self, body: TokenStream) -> Self {
 88        self.advance_body = body;
 89        self
 90    }
 91
 92    /// Override the current `advance` implementation of this state.
 93    ///
 94    /// This is an in-place version of [`Self::with_impl`].
 95    pub(crate) fn set_impl(&mut self, body: TokenStream) {
 96        self.advance_body = body;
 97    }
 98}
 99
100/// A partial [`FromEventsStateMachine`] which only covers the builder for a
101/// single compound.
102///
103/// See [`FromEventsStateMachine`] for more information on the state machines
104/// in general.
105pub(crate) struct FromEventsSubmachine {
106    /// Additional items necessary for the statemachine.
107    pub(crate) defs: TokenStream,
108
109    /// States and state transition implementations.
110    pub(crate) states: Vec<State>,
111
112    /// Initializer expression.
113    ///
114    /// This expression must evaluate to a
115    /// `Result<#state_ty_ident, xso::FromEventsError>`.
116    pub(crate) init: TokenStream,
117}
118
119impl FromEventsSubmachine {
120    /// Convert a partial state machine into a full state machine.
121    ///
122    /// This converts the abstract [`State`] items into token
123    /// streams for the respective parts of the state machine (the state
124    /// definitions and the match arms), rendering them effectively immutable.
125    pub(crate) fn compile(self) -> FromEventsStateMachine {
126        let mut state_defs = TokenStream::default();
127        let mut advance_match_arms = TokenStream::default();
128
129        for state in self.states {
130            let State {
131                name,
132                decl,
133                destructure,
134                advance_body,
135            } = state;
136
137            state_defs.extend(quote! {
138                #name { #decl },
139            });
140
141            // XXX: nasty hack, but works: the first member of the enum always
142            // exists and it always is the builder data, which we always need
143            // mutably available. So we can just prefix the destructuring
144            // token stream with `mut` to make that first member mutable.
145            advance_match_arms.extend(quote! {
146                Self::#name { mut #destructure } => {
147                    #advance_body
148                }
149            });
150        }
151
152        FromEventsStateMachine {
153            defs: self.defs,
154            state_defs,
155            advance_match_arms,
156            variants: vec![FromEventsEntryPoint { init: self.init }],
157        }
158    }
159
160    /// Update the [`init`][`Self::init`] field in-place.
161    ///
162    /// The function will receive a reference to the current `init` value,
163    /// allowing to create "wrappers" around that existing code.
164    pub(crate) fn with_augmented_init<F: FnOnce(&TokenStream) -> TokenStream>(
165        mut self,
166        f: F,
167    ) -> Self {
168        let new_init = f(&self.init);
169        self.init = new_init;
170        self
171    }
172}
173
174/// A partial [`AsItemsStateMachine`] which only covers the builder for a
175/// single compound.
176///
177/// See [`AsItemsStateMachine`] for more information on the state machines
178/// in general.
179pub(crate) struct AsItemsSubmachine {
180    /// Additional items necessary for the statemachine.
181    pub(crate) defs: TokenStream,
182
183    /// States and state transition implementations.
184    pub(crate) states: Vec<State>,
185
186    /// A pattern match which destructures the target type into its parts, for
187    /// use by `init`.
188    pub(crate) destructure: TokenStream,
189
190    /// An expression which uses the names bound in `destructure` to create a
191    /// an instance of the state enum.
192    ///
193    /// The state enum type is available as `Self` in that context.
194    pub(crate) init: TokenStream,
195}
196
197impl AsItemsSubmachine {
198    /// Convert a partial state machine into a full state machine.
199    ///
200    /// This converts the abstract [`State`] items into token
201    /// streams for the respective parts of the state machine (the state
202    /// definitions and the match arms), rendering them effectively immutable.
203    ///
204    /// This requires that the [`State::advance_body`] token streams evaluate
205    /// to an `Option<Item>`. If it evaluates to `Some(.)`, that is
206    /// emitted from the iterator. If it evaluates to `None`, the `advance`
207    /// implementation is called again.
208    ///
209    /// Each state implementation is augmented to also enter the next state,
210    /// causing the iterator to terminate eventually.
211    pub(crate) fn compile(self) -> AsItemsStateMachine {
212        let mut state_defs = TokenStream::default();
213        let mut advance_match_arms = TokenStream::default();
214
215        for (i, state) in self.states.iter().enumerate() {
216            let State {
217                ref name,
218                ref decl,
219                ref destructure,
220                ref advance_body,
221            } = state;
222
223            let footer = match self.states.get(i + 1) {
224                Some(State {
225                    name: ref next_name,
226                    destructure: ref construct_next,
227                    ..
228                }) => {
229                    quote! {
230                        ::core::result::Result::Ok((::core::option::Option::Some(Self::#next_name { #construct_next }), item))
231                    }
232                }
233                // final state -> exit the state machine
234                None => {
235                    quote! {
236                        ::core::result::Result::Ok((::core::option::Option::None, item))
237                    }
238                }
239            };
240
241            state_defs.extend(quote! {
242                #name { #decl },
243            });
244
245            advance_match_arms.extend(quote! {
246                Self::#name { #destructure } => {
247                    let item = #advance_body;
248                    #footer
249                }
250            });
251        }
252
253        AsItemsStateMachine {
254            defs: self.defs,
255            state_defs,
256            advance_match_arms,
257            variants: vec![AsItemsEntryPoint {
258                init: self.init,
259                destructure: self.destructure,
260            }],
261        }
262    }
263
264    /// Update the [`init`][`Self::init`] field in-place.
265    ///
266    /// The function will receive a reference to the current `init` value,
267    /// allowing to create "wrappers" around that existing code.
268    pub(crate) fn with_augmented_init<F: FnOnce(&TokenStream) -> TokenStream>(
269        mut self,
270        f: F,
271    ) -> Self {
272        let new_init = f(&self.init);
273        self.init = new_init;
274        self
275    }
276}
277
278/// Container for a single entrypoint into a [`FromEventsStateMachine`].
279pub(crate) struct FromEventsEntryPoint {
280    pub(crate) init: TokenStream,
281}
282
283/// A single variant's entrypoint into the event iterator.
284pub(crate) struct AsItemsEntryPoint {
285    /// A pattern match which destructures the target type into its parts, for
286    /// use by `init`.
287    destructure: TokenStream,
288
289    /// An expression which uses the names bound in `destructure` to create a
290    /// an instance of the state enum.
291    ///
292    /// The state enum type is available as `Self` in that context.
293    init: TokenStream,
294}
295
296/// # State machine to implement `xso::FromEventsBuilder`
297///
298/// This struct represents a state machine consisting of the following parts:
299///
300/// - Extra dependencies ([`Self::defs`])
301/// - States ([`Self::state_defs`])
302/// - Transitions ([`Self::advance_match_arms`])
303/// - Entrypoints ([`Self::variants`])
304///
305/// Such a state machine is best constructed by constructing one or
306/// more [`FromEventsSubmachine`] structs and converting/merging them using
307/// `into()` and [`merge`][`Self::merge`].
308///
309/// A state machine has an output type (corresponding to
310/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
311/// by the expressions generated in the `advance_match_arms`. That means that
312/// merging submachines with different output types works, but will then generate
313/// code which will fail to compile.
314///
315/// When converted to Rust code, the state machine will manifest as (among other
316/// things) an enum type which contains all states and which has an `advance`
317/// method. That method consumes the enum value and returns either a new enum
318/// value, an error, or the output type of the state machine.
319#[derive(Default)]
320pub(crate) struct FromEventsStateMachine {
321    /// Extra items which are needed for the state machine implementation.
322    defs: TokenStream,
323
324    /// A sequence of enum variant declarations, separated and terminated by
325    /// commas.
326    state_defs: TokenStream,
327
328    /// A sequence of `match self { .. }` arms, where `self` is the state
329    /// enumeration type.
330    ///
331    /// Each match arm must either diverge or evaluate to a
332    /// `Result<ControlFlow<State, Output>, xso::error::Error>`, where `State`
333    /// is the state enumeration and `Output` is the state machine's output
334    /// type.
335    advance_match_arms: TokenStream,
336
337    /// The different entrypoints for the state machine.
338    ///
339    /// This may only contain more than one element if an enumeration is being
340    /// constructed by the resulting state machine.
341    variants: Vec<FromEventsEntryPoint>,
342}
343
344impl FromEventsStateMachine {
345    /// Render the state machine as a token stream.
346    ///
347    /// The token stream contains the following pieces:
348    /// - Any definitions necessary for the statemachine to operate
349    /// - The state enum
350    /// - The builder struct
351    /// - The `xso::FromEventsBuilder` impl on the builder struct
352    /// - A `fn new(rxml::QName, rxml::AttrMap) -> Result<Self>` on the
353    ///   builder struct.
354    pub(crate) fn render(
355        self,
356        vis: &Visibility,
357        builder_ty_ident: &Ident,
358        state_ty_ident: &Ident,
359        output_ty: &Type,
360    ) -> Result<TokenStream> {
361        let Self {
362            defs,
363            state_defs,
364            advance_match_arms,
365            variants,
366        } = self;
367
368        let mut init_body = TokenStream::default();
369        for variant in variants {
370            let FromEventsEntryPoint { init } = variant;
371            init_body.extend(quote! {
372                let (name, mut attrs) = match { { let _ = &mut attrs; } #init } {
373                    ::core::result::Result::Ok(v) => return ::core::result::Result::Ok(v),
374                    ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)),
375                    ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs),
376                };
377            })
378        }
379
380        let output_ty_ref = make_ty_ref(output_ty);
381
382        let docstr = format!("Build a {0} from XML events.\n\nThis type is generated using the [`macro@xso::FromXml`] derive macro and implements [`xso::FromEventsBuilder`] for {0}.", output_ty_ref);
383
384        Ok(quote! {
385            #defs
386
387            enum #state_ty_ident {
388                #state_defs
389            }
390
391            impl #state_ty_ident {
392                fn advance(mut self, ev: ::xso::exports::rxml::Event) -> ::core::result::Result<::std::ops::ControlFlow<Self, #output_ty>, ::xso::error::Error> {
393                    match self {
394                        #advance_match_arms
395                    }.and_then(|__ok| {
396                        match __ok {
397                            ::std::ops::ControlFlow::Break(st) => ::core::result::Result::Ok(::std::ops::ControlFlow::Break(st)),
398                            ::std::ops::ControlFlow::Continue(result) => {
399                                ::core::result::Result::Ok(::std::ops::ControlFlow::Continue(result))
400                            }
401                        }
402                    })
403                }
404            }
405
406            impl #builder_ty_ident {
407                fn new(
408                    name: ::xso::exports::rxml::QName,
409                    attrs: ::xso::exports::rxml::AttrMap,
410                ) -> ::core::result::Result<Self, ::xso::error::FromEventsError> {
411                    #state_ty_ident::new(name, attrs).map(|ok| Self(::core::option::Option::Some(ok)))
412                }
413            }
414
415            #[doc = #docstr]
416            #vis struct #builder_ty_ident(::core::option::Option<#state_ty_ident>);
417
418            impl ::xso::FromEventsBuilder for #builder_ty_ident {
419                type Output = #output_ty;
420
421                fn feed(&mut self, ev: ::xso::exports::rxml::Event) -> ::core::result::Result<::core::option::Option<Self::Output>, ::xso::error::Error> {
422                    let inner = self.0.take().expect("feed called after completion");
423                    match inner.advance(ev)? {
424                        ::std::ops::ControlFlow::Continue(value) => ::core::result::Result::Ok(::core::option::Option::Some(value)),
425                        ::std::ops::ControlFlow::Break(st) => {
426                            self.0 = ::core::option::Option::Some(st);
427                            ::core::result::Result::Ok(::core::option::Option::None)
428                        }
429                    }
430                }
431            }
432
433            impl #state_ty_ident {
434                fn new(
435                    name: ::xso::exports::rxml::QName,
436                    mut attrs: ::xso::exports::rxml::AttrMap,
437                ) -> ::core::result::Result<Self, ::xso::error::FromEventsError> {
438                    #init_body
439                    { let _ = &mut attrs; }
440                    ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs })
441                }
442            }
443        })
444    }
445}
446
447/// # State machine to implement an `Iterator<Item = rxml::Event>`.
448///
449/// This struct represents a state machine consisting of the following parts:
450///
451/// - Extra dependencies ([`Self::defs`])
452/// - States ([`Self::state_defs`])
453/// - Transitions ([`Self::advance_match_arms`])
454/// - Entrypoints ([`Self::variants`])
455///
456/// Such a state machine is best constructed by constructing one or
457/// more [`FromEventsSubmachine`] structs and converting/merging them using
458/// `into()` and [`merge`][`Self::merge`].
459///
460/// A state machine has an output type (corresponding to
461/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
462/// by the expressions generated in the `advance_match_arms`. That means that
463/// merging submachines with different output types works, but will then generate
464/// code which will fail to compile.
465///
466/// When converted to Rust code, the state machine will manifest as (among other
467/// things) an enum type which contains all states and which has an `advance`
468/// method. That method consumes the enum value and returns either a new enum
469/// value, an error, or the output type of the state machine.
470#[derive(Default)]
471pub(crate) struct AsItemsStateMachine {
472    /// Extra items which are needed for the state machine implementation.
473    defs: TokenStream,
474
475    /// A sequence of enum variant declarations, separated and terminated by
476    /// commas.
477    state_defs: TokenStream,
478
479    /// A sequence of `match self { .. }` arms, where `self` is the state
480    /// enumeration type.
481    ///
482    /// Each match arm must either diverge or evaluate to a
483    /// `Result<(Option<State>, Option<Item>), xso::error::Error>`, where
484    /// where `State` is the state enumeration.
485    ///
486    /// If `Some(.)` is returned for the event, that event is emitted. If
487    /// `None` is returned for the event, the advance implementation is called
488    /// again after switching to the state returned in the `Option<State>`
489    /// field.
490    ///
491    /// If `None` is returned for the `Option<State>`, the iterator
492    /// terminates yielding the `Option<Item>` value directly (even if it is
493    /// `None`). After the iterator has terminated, it yields `None`
494    /// indefinitely.
495    advance_match_arms: TokenStream,
496
497    /// The different entrypoints for the state machine.
498    ///
499    /// This may only contain more than one element if an enumeration is being
500    /// serialised by the resulting state machine.
501    variants: Vec<AsItemsEntryPoint>,
502}
503
504impl AsItemsStateMachine {
505    /// Render the state machine as a token stream.
506    ///
507    /// The token stream contains the following pieces:
508    /// - Any definitions necessary for the statemachine to operate
509    /// - The state enum
510    /// - The iterator struct
511    /// - The `Iterator` impl on the builder struct
512    /// - A `fn new(T) -> Result<Self>` on the iterator struct.
513    pub(crate) fn render(
514        self,
515        vis: &Visibility,
516        input_ty: &Type,
517        state_ty_ident: &Ident,
518        item_iter_ty_lifetime: &Lifetime,
519        item_iter_ty: &Type,
520    ) -> Result<TokenStream> {
521        let Self {
522            defs,
523            state_defs,
524            advance_match_arms,
525            mut variants,
526        } = self;
527
528        let input_ty_ref = make_ty_ref(input_ty);
529        let docstr = format!("Convert a {0} into XML events.\n\nThis type is generated using the [`macro@xso::AsXml`] derive macro and implements [`std::iter:Iterator`] for {0}.", input_ty_ref);
530
531        let init_body = if variants.len() == 1 {
532            let AsItemsEntryPoint { destructure, init } = variants.remove(0);
533            quote! {
534                {
535                    let #destructure = value;
536                    #init
537                }
538            }
539        } else {
540            let mut match_arms = TokenStream::default();
541            for AsItemsEntryPoint { destructure, init } in variants {
542                match_arms.extend(quote! {
543                    #destructure => #init,
544                });
545            }
546
547            quote! {
548                match value {
549                    #match_arms
550                }
551            }
552        };
553
554        Ok(quote! {
555            #defs
556
557            enum #state_ty_ident<#item_iter_ty_lifetime> {
558                #state_defs
559            }
560
561            impl<#item_iter_ty_lifetime> #state_ty_ident<#item_iter_ty_lifetime> {
562                fn advance(mut self) -> ::core::result::Result<(::core::option::Option<Self>, ::core::option::Option<::xso::Item<#item_iter_ty_lifetime>>), ::xso::error::Error> {
563                    match self {
564                        #advance_match_arms
565                    }
566                }
567
568                fn new(
569                    value: &#item_iter_ty_lifetime #input_ty,
570                ) -> ::core::result::Result<Self, ::xso::error::Error> {
571                    ::core::result::Result::Ok(#init_body)
572                }
573            }
574
575            #[doc = #docstr]
576            #vis struct #item_iter_ty(::core::option::Option<#state_ty_ident<#item_iter_ty_lifetime>>);
577
578            impl<#item_iter_ty_lifetime> ::std::iter::Iterator for #item_iter_ty {
579                type Item = ::core::result::Result<::xso::Item<#item_iter_ty_lifetime>, ::xso::error::Error>;
580
581                fn next(&mut self) -> ::core::option::Option<Self::Item> {
582                    let mut state = self.0.take()?;
583                    loop {
584                        let (next_state, item) = match state.advance() {
585                            ::core::result::Result::Ok(v) => v,
586                            ::core::result::Result::Err(e) => return ::core::option::Option::Some(::core::result::Result::Err(e)),
587                        };
588                        if let ::core::option::Option::Some(item) = item {
589                            self.0 = next_state;
590                            return ::core::option::Option::Some(::core::result::Result::Ok(item));
591                        }
592                        // no event, do we have a state?
593                        if let ::core::option::Option::Some(st) = next_state {
594                            // we do: try again!
595                            state = st;
596                            continue;
597                        } else {
598                            // we don't: end of iterator!
599                            self.0 = ::core::option::Option::None;
600                            return ::core::option::Option::None;
601                        }
602                    }
603                }
604            }
605
606            impl<#item_iter_ty_lifetime> #item_iter_ty {
607                fn new(value: &#item_iter_ty_lifetime #input_ty) -> ::core::result::Result<Self, ::xso::error::Error> {
608                    #state_ty_ident::new(value).map(|ok| Self(::core::option::Option::Some(ok)))
609                }
610            }
611        })
612    }
613}
614
615/// Construct a path for an intradoc link from a given type.
616fn doc_link_path(ty: &Type) -> Option<String> {
617    match ty {
618        Type::Path(ref ty) => {
619            let (mut buf, offset) = match ty.qself {
620                Some(ref qself) => {
621                    let mut buf = doc_link_path(&qself.ty)?;
622                    buf.push_str("::");
623                    (buf, qself.position)
624                }
625                None => {
626                    let mut buf = String::new();
627                    if ty.path.leading_colon.is_some() {
628                        buf.push_str("::");
629                    }
630                    (buf, 0)
631                }
632            };
633            let last = ty.path.segments.len() - 1;
634            for i in offset..ty.path.segments.len() {
635                let segment = &ty.path.segments[i];
636                buf.push_str(&segment.ident.to_string());
637                if i < last {
638                    buf.push_str("::");
639                }
640            }
641            Some(buf)
642        }
643        _ => None,
644    }
645}
646
647/// Create a markdown snippet which references the given type as cleanly as
648/// possible.
649///
650/// This is used in documentation generation functions.
651///
652/// Not all types can be linked to; those which cannot be linked to will
653/// simply be wrapped in backticks.
654fn make_ty_ref(ty: &Type) -> String {
655    match doc_link_path(ty) {
656        Some(mut path) => {
657            path.reserve(4);
658            path.insert_str(0, "[`");
659            path.push_str("`]");
660            path
661        }
662        None => format!("`{}`", ty.to_token_stream()),
663    }
664}