1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! State machines for parsing and serialising of structs and enums.
8
9use proc_macro2::TokenStream;
10use quote::{quote, ToTokens};
11use syn::*;
12
13/// A single state in a parser or serializer state machine.
14pub(crate) struct State {
15 /// Name of the state enum variant for this state.
16 name: Ident,
17
18 /// Declaration of members of the state enum in this state.
19 decl: TokenStream,
20
21 /// Destructuring of members of the state enum in this state.
22 destructure: TokenStream,
23
24 /// Right-hand-side of the match arm for this state.
25 advance_body: TokenStream,
26
27 /// If set, that identifier will be bound mutably.
28 uses_mut: Option<Ident>,
29}
30
31impl State {
32 /// Create a new state with the a builder data field.
33 ///
34 /// This is a convenience wrapper around `new()` and `add_field()`. This
35 /// wrapper, or its equivalent, **must** be used for states used in
36 /// [`FromEventsStateMachine`] state machines, as those expect that the
37 /// first field is the builder data at render time.
38 pub(crate) fn new_with_builder(
39 name: Ident,
40 builder_data_ident: &Ident,
41 builder_data_ty: &Type,
42 ) -> Self {
43 let mut result = Self::new(name);
44 result.add_field(builder_data_ident, builder_data_ty);
45 result
46 }
47
48 /// Create a new, empty state.
49 ///
50 /// Note that an empty state will generate invalid code. At the very
51 /// least, a body must be added using [`Self::set_impl`] or
52 /// [`Self::with_impl`]. The various state machines may also have
53 /// additional requirements.
54 pub(crate) fn new(name: Ident) -> Self {
55 Self {
56 name,
57 decl: TokenStream::default(),
58 destructure: TokenStream::default(),
59 advance_body: TokenStream::default(),
60 uses_mut: None,
61 }
62 }
63
64 /// Add a field to this state's data.
65 ///
66 /// - `name` is the name under which the data will be accessible in the
67 /// state's implementation.
68 /// - `ty` must be the data field's type.
69 pub(crate) fn add_field(&mut self, name: &Ident, ty: &Type) {
70 self.decl.extend(quote! { #name: #ty, });
71 self.destructure.extend(quote! { #name, });
72 }
73
74 /// Modify the state to include another field and return the modified
75 /// state.
76 ///
77 /// This is a consume-and-return-style version of [`Self::add_field`].
78 pub(crate) fn with_field(mut self, name: &Ident, ty: &Type) -> Self {
79 self.add_field(name, ty);
80 self
81 }
82
83 /// Set the `advance` implementation of this state.
84 ///
85 /// `body` must be the body of the right hand side of the match arm for
86 /// the `advance` implementation of the state machine.
87 ///
88 /// See [`FromEventsStateMachine::advance_match_arms`] and
89 /// [`AsItemsSubmachine::compile`] for the respective
90 /// requirements on the implementations.
91 pub(crate) fn with_impl(mut self, body: TokenStream) -> Self {
92 self.advance_body = body;
93 self
94 }
95
96 /// Override the current `advance` implementation of this state.
97 ///
98 /// This is an in-place version of [`Self::with_impl`].
99 pub(crate) fn set_impl(&mut self, body: TokenStream) {
100 self.advance_body = body;
101 }
102
103 /// Modify the state to mark the given field as mutable and return the
104 /// modified state.
105 pub(crate) fn with_mut(mut self, ident: &Ident) -> Self {
106 assert!(self.uses_mut.is_none());
107 self.uses_mut = Some(ident.clone());
108 self
109 }
110}
111
112/// A partial [`FromEventsStateMachine`] which only covers the builder for a
113/// single compound.
114///
115/// See [`FromEventsStateMachine`] for more information on the state machines
116/// in general.
117pub(crate) struct FromEventsSubmachine {
118 /// Additional items necessary for the statemachine.
119 pub(crate) defs: TokenStream,
120
121 /// States and state transition implementations.
122 pub(crate) states: Vec<State>,
123
124 /// Initializer expression.
125 ///
126 /// This expression must evaluate to a
127 /// `Result<#state_ty_ident, xso::FromEventsError>`.
128 pub(crate) init: TokenStream,
129}
130
131impl FromEventsSubmachine {
132 /// Convert a partial state machine into a full state machine.
133 ///
134 /// This converts the abstract [`State`] items into token
135 /// streams for the respective parts of the state machine (the state
136 /// definitions and the match arms), rendering them effectively immutable.
137 pub(crate) fn compile(self) -> FromEventsStateMachine {
138 let mut state_defs = TokenStream::default();
139 let mut advance_match_arms = TokenStream::default();
140
141 for state in self.states {
142 let State {
143 name,
144 decl,
145 destructure,
146 advance_body,
147 uses_mut,
148 } = state;
149
150 state_defs.extend(quote! {
151 #name { #decl },
152 });
153
154 let binding = if let Some(uses_mut) = uses_mut.as_ref() {
155 quote! {
156 let mut #uses_mut = #uses_mut;
157 }
158 } else {
159 TokenStream::default()
160 };
161
162 // XXX: nasty hack, but works: the first member of the enum always
163 // exists and it always is the builder data, which we always need
164 // mutably available. So we can just prefix the destructuring
165 // token stream with `mut` to make that first member mutable.
166 advance_match_arms.extend(quote! {
167 Self::#name { mut #destructure } => {
168 #binding
169 #advance_body
170 }
171 });
172 }
173
174 FromEventsStateMachine {
175 defs: self.defs,
176 state_defs,
177 advance_match_arms,
178 variants: vec![FromEventsEntryPoint { init: self.init }],
179 }
180 }
181
182 /// Update the [`init`][`Self::init`] field in-place.
183 ///
184 /// The function will receive a reference to the current `init` value,
185 /// allowing to create "wrappers" around that existing code.
186 pub(crate) fn with_augmented_init<F: FnOnce(&TokenStream) -> TokenStream>(
187 mut self,
188 f: F,
189 ) -> Self {
190 let new_init = f(&self.init);
191 self.init = new_init;
192 self
193 }
194}
195
196/// A partial [`AsItemsStateMachine`] which only covers the builder for a
197/// single compound.
198///
199/// See [`AsItemsStateMachine`] for more information on the state machines
200/// in general.
201pub(crate) struct AsItemsSubmachine {
202 /// Additional items necessary for the statemachine.
203 pub(crate) defs: TokenStream,
204
205 /// States and state transition implementations.
206 pub(crate) states: Vec<State>,
207
208 /// A pattern match which destructures the target type into its parts, for
209 /// use by `init`.
210 pub(crate) destructure: TokenStream,
211
212 /// An expression which uses the names bound in `destructure` to create a
213 /// an instance of the state enum.
214 ///
215 /// The state enum type is available as `Self` in that context.
216 pub(crate) init: TokenStream,
217}
218
219impl AsItemsSubmachine {
220 /// Convert a partial state machine into a full state machine.
221 ///
222 /// This converts the abstract [`State`] items into token
223 /// streams for the respective parts of the state machine (the state
224 /// definitions and the match arms), rendering them effectively immutable.
225 ///
226 /// This requires that the [`State::advance_body`] token streams evaluate
227 /// to an `Option<Item>`. If it evaluates to `Some(.)`, that is
228 /// emitted from the iterator. If it evaluates to `None`, the `advance`
229 /// implementation is called again.
230 ///
231 /// Each state implementation is augmented to also enter the next state,
232 /// causing the iterator to terminate eventually.
233 pub(crate) fn compile(self) -> AsItemsStateMachine {
234 let mut state_defs = TokenStream::default();
235 let mut advance_match_arms = TokenStream::default();
236
237 for (i, state) in self.states.iter().enumerate() {
238 let State {
239 ref name,
240 ref decl,
241 ref destructure,
242 ref advance_body,
243 ref uses_mut,
244 } = state;
245
246 let footer = match self.states.get(i + 1) {
247 Some(State {
248 name: ref next_name,
249 destructure: ref construct_next,
250 ..
251 }) => {
252 quote! {
253 ::core::result::Result::Ok((::core::option::Option::Some(Self::#next_name { #construct_next }), item))
254 }
255 }
256 // final state -> exit the state machine
257 None => {
258 quote! {
259 ::core::result::Result::Ok((::core::option::Option::None, item))
260 }
261 }
262 };
263
264 state_defs.extend(quote! {
265 #name { #decl },
266 });
267
268 if let Some(uses_mut) = uses_mut.as_ref() {
269 // the variant is non-consuming, meaning it can be called
270 // multiple times and it uses the identifier in `uses_mut`
271 // mutably.
272 // the transition is only triggered when it emits a None
273 // item
274 // (we cannot do this at the place the `State` is constructed,
275 // because we don't yet know all its fields then; it must be
276 // done here.)
277 advance_match_arms.extend(quote! {
278 Self::#name { #destructure } => {
279 let mut #uses_mut = #uses_mut;
280 match #advance_body {
281 ::std::option::Option::Some(item) => {
282 ::std::result::Result::Ok((::std::option::Option::Some(Self::#name { #destructure }), ::std::option::Option::Some(item)))
283 },
284 item => { #footer },
285 }
286 }
287 });
288 } else {
289 // if the variant is consuming, it can only be called once.
290 // it may or may not emit an event, but the transition is
291 // always triggered
292 advance_match_arms.extend(quote! {
293 Self::#name { #destructure } => {
294 let item = #advance_body;
295 #footer
296 }
297 });
298 }
299 }
300
301 AsItemsStateMachine {
302 defs: self.defs,
303 state_defs,
304 advance_match_arms,
305 variants: vec![AsItemsEntryPoint {
306 init: self.init,
307 destructure: self.destructure,
308 }],
309 }
310 }
311
312 /// Update the [`init`][`Self::init`] field in-place.
313 ///
314 /// The function will receive a reference to the current `init` value,
315 /// allowing to create "wrappers" around that existing code.
316 pub(crate) fn with_augmented_init<F: FnOnce(&TokenStream) -> TokenStream>(
317 mut self,
318 f: F,
319 ) -> Self {
320 let new_init = f(&self.init);
321 self.init = new_init;
322 self
323 }
324}
325
326/// Container for a single entrypoint into a [`FromEventsStateMachine`].
327pub(crate) struct FromEventsEntryPoint {
328 pub(crate) init: TokenStream,
329}
330
331/// A single variant's entrypoint into the event iterator.
332pub(crate) struct AsItemsEntryPoint {
333 /// A pattern match which destructures the target type into its parts, for
334 /// use by `init`.
335 destructure: TokenStream,
336
337 /// An expression which uses the names bound in `destructure` to create a
338 /// an instance of the state enum.
339 ///
340 /// The state enum type is available as `Self` in that context.
341 init: TokenStream,
342}
343
344/// # State machine to implement `xso::FromEventsBuilder`
345///
346/// This struct represents a state machine consisting of the following parts:
347///
348/// - Extra dependencies ([`Self::defs`])
349/// - States ([`Self::state_defs`])
350/// - Transitions ([`Self::advance_match_arms`])
351/// - Entrypoints ([`Self::variants`])
352///
353/// Such a state machine is best constructed by constructing one or
354/// more [`FromEventsSubmachine`] structs and converting/merging them using
355/// `into()` and [`merge`][`Self::merge`].
356///
357/// A state machine has an output type (corresponding to
358/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
359/// by the expressions generated in the `advance_match_arms`. That means that
360/// merging submachines with different output types works, but will then generate
361/// code which will fail to compile.
362///
363/// When converted to Rust code, the state machine will manifest as (among other
364/// things) an enum type which contains all states and which has an `advance`
365/// method. That method consumes the enum value and returns either a new enum
366/// value, an error, or the output type of the state machine.
367#[derive(Default)]
368pub(crate) struct FromEventsStateMachine {
369 /// Extra items which are needed for the state machine implementation.
370 defs: TokenStream,
371
372 /// A sequence of enum variant declarations, separated and terminated by
373 /// commas.
374 state_defs: TokenStream,
375
376 /// A sequence of `match self { .. }` arms, where `self` is the state
377 /// enumeration type.
378 ///
379 /// Each match arm must either diverge or evaluate to a
380 /// `Result<ControlFlow<State, Output>, xso::error::Error>`, where `State`
381 /// is the state enumeration and `Output` is the state machine's output
382 /// type.
383 advance_match_arms: TokenStream,
384
385 /// The different entrypoints for the state machine.
386 ///
387 /// This may only contain more than one element if an enumeration is being
388 /// constructed by the resulting state machine.
389 variants: Vec<FromEventsEntryPoint>,
390}
391
392impl FromEventsStateMachine {
393 /// Render the state machine as a token stream.
394 ///
395 /// The token stream contains the following pieces:
396 /// - Any definitions necessary for the statemachine to operate
397 /// - The state enum
398 /// - The builder struct
399 /// - The `xso::FromEventsBuilder` impl on the builder struct
400 /// - A `fn new(rxml::QName, rxml::AttrMap) -> Result<Self>` on the
401 /// builder struct.
402 pub(crate) fn render(
403 self,
404 vis: &Visibility,
405 builder_ty_ident: &Ident,
406 state_ty_ident: &Ident,
407 output_ty: &Type,
408 ) -> Result<TokenStream> {
409 let Self {
410 defs,
411 state_defs,
412 advance_match_arms,
413 variants,
414 } = self;
415
416 let mut init_body = TokenStream::default();
417 for variant in variants {
418 let FromEventsEntryPoint { init } = variant;
419 init_body.extend(quote! {
420 let (name, mut attrs) = match { { let _ = &mut attrs; } #init } {
421 ::core::result::Result::Ok(v) => return ::core::result::Result::Ok(v),
422 ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)),
423 ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs),
424 };
425 })
426 }
427
428 let output_ty_ref = make_ty_ref(output_ty);
429
430 let docstr = format!("Build a {0} from XML events.\n\nThis type is generated using the [`macro@xso::FromXml`] derive macro and implements [`xso::FromEventsBuilder`] for {0}.", output_ty_ref);
431
432 Ok(quote! {
433 #defs
434
435 enum #state_ty_ident {
436 #state_defs
437 }
438
439 impl #state_ty_ident {
440 fn advance(mut self, ev: ::xso::exports::rxml::Event) -> ::core::result::Result<::std::ops::ControlFlow<Self, #output_ty>, ::xso::error::Error> {
441 match self {
442 #advance_match_arms
443 }.and_then(|__ok| {
444 match __ok {
445 ::std::ops::ControlFlow::Break(st) => ::core::result::Result::Ok(::std::ops::ControlFlow::Break(st)),
446 ::std::ops::ControlFlow::Continue(result) => {
447 ::core::result::Result::Ok(::std::ops::ControlFlow::Continue(result))
448 }
449 }
450 })
451 }
452 }
453
454 impl #builder_ty_ident {
455 fn new(
456 name: ::xso::exports::rxml::QName,
457 attrs: ::xso::exports::rxml::AttrMap,
458 ) -> ::core::result::Result<Self, ::xso::error::FromEventsError> {
459 #state_ty_ident::new(name, attrs).map(|ok| Self(::core::option::Option::Some(ok)))
460 }
461 }
462
463 #[doc = #docstr]
464 #vis struct #builder_ty_ident(::core::option::Option<#state_ty_ident>);
465
466 impl ::xso::FromEventsBuilder for #builder_ty_ident {
467 type Output = #output_ty;
468
469 fn feed(&mut self, ev: ::xso::exports::rxml::Event) -> ::core::result::Result<::core::option::Option<Self::Output>, ::xso::error::Error> {
470 let inner = self.0.take().expect("feed called after completion");
471 match inner.advance(ev)? {
472 ::std::ops::ControlFlow::Continue(value) => ::core::result::Result::Ok(::core::option::Option::Some(value)),
473 ::std::ops::ControlFlow::Break(st) => {
474 self.0 = ::core::option::Option::Some(st);
475 ::core::result::Result::Ok(::core::option::Option::None)
476 }
477 }
478 }
479 }
480
481 impl #state_ty_ident {
482 fn new(
483 name: ::xso::exports::rxml::QName,
484 mut attrs: ::xso::exports::rxml::AttrMap,
485 ) -> ::core::result::Result<Self, ::xso::error::FromEventsError> {
486 #init_body
487 { let _ = &mut attrs; }
488 ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs })
489 }
490 }
491 })
492 }
493}
494
495/// # State machine to implement an `Iterator<Item = rxml::Event>`.
496///
497/// This struct represents a state machine consisting of the following parts:
498///
499/// - Extra dependencies ([`Self::defs`])
500/// - States ([`Self::state_defs`])
501/// - Transitions ([`Self::advance_match_arms`])
502/// - Entrypoints ([`Self::variants`])
503///
504/// Such a state machine is best constructed by constructing one or
505/// more [`FromEventsSubmachine`] structs and converting/merging them using
506/// `into()` and [`merge`][`Self::merge`].
507///
508/// A state machine has an output type (corresponding to
509/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
510/// by the expressions generated in the `advance_match_arms`. That means that
511/// merging submachines with different output types works, but will then generate
512/// code which will fail to compile.
513///
514/// When converted to Rust code, the state machine will manifest as (among other
515/// things) an enum type which contains all states and which has an `advance`
516/// method. That method consumes the enum value and returns either a new enum
517/// value, an error, or the output type of the state machine.
518#[derive(Default)]
519pub(crate) struct AsItemsStateMachine {
520 /// Extra items which are needed for the state machine implementation.
521 defs: TokenStream,
522
523 /// A sequence of enum variant declarations, separated and terminated by
524 /// commas.
525 state_defs: TokenStream,
526
527 /// A sequence of `match self { .. }` arms, where `self` is the state
528 /// enumeration type.
529 ///
530 /// Each match arm must either diverge or evaluate to a
531 /// `Result<(Option<State>, Option<Item>), xso::error::Error>`, where
532 /// where `State` is the state enumeration.
533 ///
534 /// If `Some(.)` is returned for the event, that event is emitted. If
535 /// `None` is returned for the event, the advance implementation is called
536 /// again after switching to the state returned in the `Option<State>`
537 /// field.
538 ///
539 /// If `None` is returned for the `Option<State>`, the iterator
540 /// terminates yielding the `Option<Item>` value directly (even if it is
541 /// `None`). After the iterator has terminated, it yields `None`
542 /// indefinitely.
543 advance_match_arms: TokenStream,
544
545 /// The different entrypoints for the state machine.
546 ///
547 /// This may only contain more than one element if an enumeration is being
548 /// serialised by the resulting state machine.
549 variants: Vec<AsItemsEntryPoint>,
550}
551
552impl AsItemsStateMachine {
553 /// Render the state machine as a token stream.
554 ///
555 /// The token stream contains the following pieces:
556 /// - Any definitions necessary for the statemachine to operate
557 /// - The state enum
558 /// - The iterator struct
559 /// - The `Iterator` impl on the builder struct
560 /// - A `fn new(T) -> Result<Self>` on the iterator struct.
561 pub(crate) fn render(
562 self,
563 vis: &Visibility,
564 input_ty: &Type,
565 state_ty_ident: &Ident,
566 item_iter_ty_lifetime: &Lifetime,
567 item_iter_ty: &Type,
568 ) -> Result<TokenStream> {
569 let Self {
570 defs,
571 state_defs,
572 advance_match_arms,
573 mut variants,
574 } = self;
575
576 let input_ty_ref = make_ty_ref(input_ty);
577 let docstr = format!("Convert a {0} into XML events.\n\nThis type is generated using the [`macro@xso::AsXml`] derive macro and implements [`std::iter:Iterator`] for {0}.", input_ty_ref);
578
579 let init_body = if variants.len() == 1 {
580 let AsItemsEntryPoint { destructure, init } = variants.remove(0);
581 quote! {
582 {
583 let #destructure = value;
584 #init
585 }
586 }
587 } else {
588 let mut match_arms = TokenStream::default();
589 for AsItemsEntryPoint { destructure, init } in variants {
590 match_arms.extend(quote! {
591 #destructure => #init,
592 });
593 }
594
595 quote! {
596 match value {
597 #match_arms
598 }
599 }
600 };
601
602 Ok(quote! {
603 #defs
604
605 enum #state_ty_ident<#item_iter_ty_lifetime> {
606 #state_defs
607 }
608
609 impl<#item_iter_ty_lifetime> #state_ty_ident<#item_iter_ty_lifetime> {
610 fn advance(mut self) -> ::core::result::Result<(::core::option::Option<Self>, ::core::option::Option<::xso::Item<#item_iter_ty_lifetime>>), ::xso::error::Error> {
611 match self {
612 #advance_match_arms
613 }
614 }
615
616 fn new(
617 value: &#item_iter_ty_lifetime #input_ty,
618 ) -> ::core::result::Result<Self, ::xso::error::Error> {
619 ::core::result::Result::Ok(#init_body)
620 }
621 }
622
623 #[doc = #docstr]
624 #vis struct #item_iter_ty(::core::option::Option<#state_ty_ident<#item_iter_ty_lifetime>>);
625
626 impl<#item_iter_ty_lifetime> ::std::iter::Iterator for #item_iter_ty {
627 type Item = ::core::result::Result<::xso::Item<#item_iter_ty_lifetime>, ::xso::error::Error>;
628
629 fn next(&mut self) -> ::core::option::Option<Self::Item> {
630 let mut state = self.0.take()?;
631 loop {
632 let (next_state, item) = match state.advance() {
633 ::core::result::Result::Ok(v) => v,
634 ::core::result::Result::Err(e) => return ::core::option::Option::Some(::core::result::Result::Err(e)),
635 };
636 if let ::core::option::Option::Some(item) = item {
637 self.0 = next_state;
638 return ::core::option::Option::Some(::core::result::Result::Ok(item));
639 }
640 // no event, do we have a state?
641 if let ::core::option::Option::Some(st) = next_state {
642 // we do: try again!
643 state = st;
644 continue;
645 } else {
646 // we don't: end of iterator!
647 self.0 = ::core::option::Option::None;
648 return ::core::option::Option::None;
649 }
650 }
651 }
652 }
653
654 impl<#item_iter_ty_lifetime> #item_iter_ty {
655 fn new(value: &#item_iter_ty_lifetime #input_ty) -> ::core::result::Result<Self, ::xso::error::Error> {
656 #state_ty_ident::new(value).map(|ok| Self(::core::option::Option::Some(ok)))
657 }
658 }
659 })
660 }
661}
662
663/// Construct a path for an intradoc link from a given type.
664fn doc_link_path(ty: &Type) -> Option<String> {
665 match ty {
666 Type::Path(ref ty) => {
667 let (mut buf, offset) = match ty.qself {
668 Some(ref qself) => {
669 let mut buf = doc_link_path(&qself.ty)?;
670 buf.push_str("::");
671 (buf, qself.position)
672 }
673 None => {
674 let mut buf = String::new();
675 if ty.path.leading_colon.is_some() {
676 buf.push_str("::");
677 }
678 (buf, 0)
679 }
680 };
681 let last = ty.path.segments.len() - 1;
682 for i in offset..ty.path.segments.len() {
683 let segment = &ty.path.segments[i];
684 buf.push_str(&segment.ident.to_string());
685 if i < last {
686 buf.push_str("::");
687 }
688 }
689 Some(buf)
690 }
691 _ => None,
692 }
693}
694
695/// Create a markdown snippet which references the given type as cleanly as
696/// possible.
697///
698/// This is used in documentation generation functions.
699///
700/// Not all types can be linked to; those which cannot be linked to will
701/// simply be wrapped in backticks.
702fn make_ty_ref(ty: &Type) -> String {
703 match doc_link_path(ty) {
704 Some(mut path) => {
705 path.reserve(4);
706 path.insert_str(0, "[`");
707 path.push_str("`]");
708 path
709 }
710 None => format!("`{}`", ty.to_token_stream()),
711 }
712}