1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! State machines for parsing and serialising of structs and enums.
8
9use proc_macro2::TokenStream;
10use quote::{quote, ToTokens};
11use syn::*;
12
13/// A single state in a parser or serializer state machine.
14pub(crate) struct State {
15 /// Name of the state enum variant for this state.
16 name: Ident,
17
18 /// Declaration of members of the state enum in this state.
19 decl: TokenStream,
20
21 /// Destructuring of members of the state enum in this state.
22 destructure: TokenStream,
23
24 /// Right-hand-side of the match arm for this state.
25 advance_body: TokenStream,
26}
27
28impl State {
29 /// Create a new state with the a builder data field.
30 ///
31 /// This is a convenience wrapper around `new()` and `add_field()`. This
32 /// wrapper, or its equivalent, **must** be used for states used in
33 /// [`FromEventsStateMachine`] state machines, as those expect that the
34 /// first field is the builder data at render time.
35 pub(crate) fn new_with_builder(
36 name: Ident,
37 builder_data_ident: &Ident,
38 builder_data_ty: &Type,
39 ) -> Self {
40 let mut result = Self::new(name);
41 result.add_field(builder_data_ident, builder_data_ty);
42 result
43 }
44
45 /// Create a new, empty state.
46 ///
47 /// Note that an empty state will generate invalid code. At the very
48 /// least, a body must be added using [`Self::set_impl`] or
49 /// [`Self::with_impl`]. The various state machines may also have
50 /// additional requirements.
51 pub(crate) fn new(name: Ident) -> Self {
52 Self {
53 name,
54 decl: TokenStream::default(),
55 destructure: TokenStream::default(),
56 advance_body: TokenStream::default(),
57 }
58 }
59
60 /// Add a field to this state's data.
61 ///
62 /// - `name` is the name under which the data will be accessible in the
63 /// state's implementation.
64 /// - `ty` must be the data field's type.
65 pub(crate) fn add_field(&mut self, name: &Ident, ty: &Type) {
66 self.decl.extend(quote! { #name: #ty, });
67 self.destructure.extend(quote! { #name, });
68 }
69
70 /// Modify the state to include another field and return the modified
71 /// state.
72 ///
73 /// This is a consume-and-return-style version of [`Self::add_field`].
74 pub(crate) fn with_field(mut self, name: &Ident, ty: &Type) -> Self {
75 self.add_field(name, ty);
76 self
77 }
78
79 /// Set the `advance` implementation of this state.
80 ///
81 /// `body` must be the body of the right hand side of the match arm for
82 /// the `advance` implementation of the state machine.
83 ///
84 /// See [`FromEventsStateMachine::advance_match_arms`] and
85 /// [`AsItemsSubmachine::compile`] for the respective
86 /// requirements on the implementations.
87 pub(crate) fn with_impl(mut self, body: TokenStream) -> Self {
88 self.advance_body = body;
89 self
90 }
91
92 /// Override the current `advance` implementation of this state.
93 ///
94 /// This is an in-place version of [`Self::with_impl`].
95 pub(crate) fn set_impl(&mut self, body: TokenStream) {
96 self.advance_body = body;
97 }
98}
99
100/// A partial [`FromEventsStateMachine`] which only covers the builder for a
101/// single compound.
102///
103/// See [`FromEventsStateMachine`] for more information on the state machines
104/// in general.
105pub(crate) struct FromEventsSubmachine {
106 /// Additional items necessary for the statemachine.
107 pub(crate) defs: TokenStream,
108
109 /// States and state transition implementations.
110 pub(crate) states: Vec<State>,
111
112 /// Initializer expression.
113 ///
114 /// This expression must evaluate to a
115 /// `Result<#state_ty_ident, xso::FromEventsError>`.
116 pub(crate) init: TokenStream,
117}
118
119impl FromEventsSubmachine {
120 /// Convert a partial state machine into a full state machine.
121 ///
122 /// This converts the abstract [`State`] items into token
123 /// streams for the respective parts of the state machine (the state
124 /// definitions and the match arms), rendering them effectively immutable.
125 pub(crate) fn compile(self) -> FromEventsStateMachine {
126 let mut state_defs = TokenStream::default();
127 let mut advance_match_arms = TokenStream::default();
128
129 for state in self.states {
130 let State {
131 name,
132 decl,
133 destructure,
134 advance_body,
135 } = state;
136
137 state_defs.extend(quote! {
138 #name { #decl },
139 });
140
141 // XXX: nasty hack, but works: the first member of the enum always
142 // exists and it always is the builder data, which we always need
143 // mutably available. So we can just prefix the destructuring
144 // token stream with `mut` to make that first member mutable.
145 advance_match_arms.extend(quote! {
146 Self::#name { mut #destructure } => {
147 #advance_body
148 }
149 });
150 }
151
152 FromEventsStateMachine {
153 defs: self.defs,
154 state_defs,
155 advance_match_arms,
156 variants: vec![FromEventsEntryPoint { init: self.init }],
157 }
158 }
159
160 /// Update the [`init`][`Self::init`] field in-place.
161 ///
162 /// The function will receive a reference to the current `init` value,
163 /// allowing to create "wrappers" around that existing code.
164 pub(crate) fn with_augmented_init<F: FnOnce(&TokenStream) -> TokenStream>(
165 mut self,
166 f: F,
167 ) -> Self {
168 let new_init = f(&self.init);
169 self.init = new_init;
170 self
171 }
172}
173
174/// A partial [`AsItemsStateMachine`] which only covers the builder for a
175/// single compound.
176///
177/// See [`AsItemsStateMachine`] for more information on the state machines
178/// in general.
179pub(crate) struct AsItemsSubmachine {
180 /// Additional items necessary for the statemachine.
181 pub(crate) defs: TokenStream,
182
183 /// States and state transition implementations.
184 pub(crate) states: Vec<State>,
185
186 /// A pattern match which destructures the target type into its parts, for
187 /// use by `init`.
188 pub(crate) destructure: TokenStream,
189
190 /// An expression which uses the names bound in `destructure` to create a
191 /// an instance of the state enum.
192 ///
193 /// The state enum type is available as `Self` in that context.
194 pub(crate) init: TokenStream,
195}
196
197impl AsItemsSubmachine {
198 /// Convert a partial state machine into a full state machine.
199 ///
200 /// This converts the abstract [`State`] items into token
201 /// streams for the respective parts of the state machine (the state
202 /// definitions and the match arms), rendering them effectively immutable.
203 ///
204 /// This requires that the [`State::advance_body`] token streams evaluate
205 /// to an `Option<Item>`. If it evaluates to `Some(.)`, that is
206 /// emitted from the iterator. If it evaluates to `None`, the `advance`
207 /// implementation is called again.
208 ///
209 /// Each state implementation is augmented to also enter the next state,
210 /// causing the iterator to terminate eventually.
211 pub(crate) fn compile(self) -> AsItemsStateMachine {
212 let mut state_defs = TokenStream::default();
213 let mut advance_match_arms = TokenStream::default();
214
215 for (i, state) in self.states.iter().enumerate() {
216 let State {
217 ref name,
218 ref decl,
219 ref destructure,
220 ref advance_body,
221 } = state;
222
223 let footer = match self.states.get(i + 1) {
224 Some(State {
225 name: ref next_name,
226 destructure: ref construct_next,
227 ..
228 }) => {
229 quote! {
230 ::core::result::Result::Ok((::core::option::Option::Some(Self::#next_name { #construct_next }), item))
231 }
232 }
233 // final state -> exit the state machine
234 None => {
235 quote! {
236 ::core::result::Result::Ok((::core::option::Option::None, item))
237 }
238 }
239 };
240
241 state_defs.extend(quote! {
242 #name { #decl },
243 });
244
245 advance_match_arms.extend(quote! {
246 Self::#name { #destructure } => {
247 let item = #advance_body;
248 #footer
249 }
250 });
251 }
252
253 AsItemsStateMachine {
254 defs: self.defs,
255 state_defs,
256 advance_match_arms,
257 variants: vec![AsItemsEntryPoint {
258 init: self.init,
259 destructure: self.destructure,
260 }],
261 }
262 }
263
264 /// Update the [`init`][`Self::init`] field in-place.
265 ///
266 /// The function will receive a reference to the current `init` value,
267 /// allowing to create "wrappers" around that existing code.
268 pub(crate) fn with_augmented_init<F: FnOnce(&TokenStream) -> TokenStream>(
269 mut self,
270 f: F,
271 ) -> Self {
272 let new_init = f(&self.init);
273 self.init = new_init;
274 self
275 }
276}
277
278/// Container for a single entrypoint into a [`FromEventsStateMachine`].
279pub(crate) struct FromEventsEntryPoint {
280 pub(crate) init: TokenStream,
281}
282
283/// A single variant's entrypoint into the event iterator.
284pub(crate) struct AsItemsEntryPoint {
285 /// A pattern match which destructures the target type into its parts, for
286 /// use by `init`.
287 destructure: TokenStream,
288
289 /// An expression which uses the names bound in `destructure` to create a
290 /// an instance of the state enum.
291 ///
292 /// The state enum type is available as `Self` in that context.
293 init: TokenStream,
294}
295
296/// # State machine to implement `xso::FromEventsBuilder`
297///
298/// This struct represents a state machine consisting of the following parts:
299///
300/// - Extra dependencies ([`Self::defs`])
301/// - States ([`Self::state_defs`])
302/// - Transitions ([`Self::advance_match_arms`])
303/// - Entrypoints ([`Self::variants`])
304///
305/// Such a state machine is best constructed by constructing one or
306/// more [`FromEventsSubmachine`] structs and converting/merging them using
307/// `into()` and [`merge`][`Self::merge`].
308///
309/// A state machine has an output type (corresponding to
310/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
311/// by the expressions generated in the `advance_match_arms`. That means that
312/// merging submachines with different output types works, but will then generate
313/// code which will fail to compile.
314///
315/// When converted to Rust code, the state machine will manifest as (among other
316/// things) an enum type which contains all states and which has an `advance`
317/// method. That method consumes the enum value and returns either a new enum
318/// value, an error, or the output type of the state machine.
319#[derive(Default)]
320pub(crate) struct FromEventsStateMachine {
321 /// Extra items which are needed for the state machine implementation.
322 defs: TokenStream,
323
324 /// A sequence of enum variant declarations, separated and terminated by
325 /// commas.
326 state_defs: TokenStream,
327
328 /// A sequence of `match self { .. }` arms, where `self` is the state
329 /// enumeration type.
330 ///
331 /// Each match arm must either diverge or evaluate to a
332 /// `Result<ControlFlow<State, Output>, xso::error::Error>`, where `State`
333 /// is the state enumeration and `Output` is the state machine's output
334 /// type.
335 advance_match_arms: TokenStream,
336
337 /// The different entrypoints for the state machine.
338 ///
339 /// This may only contain more than one element if an enumeration is being
340 /// constructed by the resulting state machine.
341 variants: Vec<FromEventsEntryPoint>,
342}
343
344impl FromEventsStateMachine {
345 /// Render the state machine as a token stream.
346 ///
347 /// The token stream contains the following pieces:
348 /// - Any definitions necessary for the statemachine to operate
349 /// - The state enum
350 /// - The builder struct
351 /// - The `xso::FromEventsBuilder` impl on the builder struct
352 /// - A `fn new(rxml::QName, rxml::AttrMap) -> Result<Self>` on the
353 /// builder struct.
354 pub(crate) fn render(
355 self,
356 vis: &Visibility,
357 builder_ty_ident: &Ident,
358 state_ty_ident: &Ident,
359 output_ty: &Type,
360 ) -> Result<TokenStream> {
361 let Self {
362 defs,
363 state_defs,
364 advance_match_arms,
365 variants,
366 } = self;
367
368 let mut init_body = TokenStream::default();
369 for variant in variants {
370 let FromEventsEntryPoint { init } = variant;
371 init_body.extend(quote! {
372 let (name, mut attrs) = match { { let _ = &mut attrs; } #init } {
373 ::core::result::Result::Ok(v) => return ::core::result::Result::Ok(v),
374 ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)),
375 ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs),
376 };
377 })
378 }
379
380 let output_ty_ref = make_ty_ref(output_ty);
381
382 let docstr = format!("Build a {0} from XML events.\n\nThis type is generated using the [`macro@xso::FromXml`] derive macro and implements [`xso::FromEventsBuilder`] for {0}.", output_ty_ref);
383
384 Ok(quote! {
385 #defs
386
387 enum #state_ty_ident {
388 #state_defs
389 }
390
391 impl #state_ty_ident {
392 fn advance(mut self, ev: ::xso::exports::rxml::Event) -> ::core::result::Result<::std::ops::ControlFlow<Self, #output_ty>, ::xso::error::Error> {
393 match self {
394 #advance_match_arms
395 }.and_then(|__ok| {
396 match __ok {
397 ::std::ops::ControlFlow::Break(st) => ::core::result::Result::Ok(::std::ops::ControlFlow::Break(st)),
398 ::std::ops::ControlFlow::Continue(result) => {
399 ::core::result::Result::Ok(::std::ops::ControlFlow::Continue(result))
400 }
401 }
402 })
403 }
404 }
405
406 impl #builder_ty_ident {
407 fn new(
408 name: ::xso::exports::rxml::QName,
409 attrs: ::xso::exports::rxml::AttrMap,
410 ) -> ::core::result::Result<Self, ::xso::error::FromEventsError> {
411 #state_ty_ident::new(name, attrs).map(|ok| Self(::core::option::Option::Some(ok)))
412 }
413 }
414
415 #[doc = #docstr]
416 #vis struct #builder_ty_ident(::core::option::Option<#state_ty_ident>);
417
418 impl ::xso::FromEventsBuilder for #builder_ty_ident {
419 type Output = #output_ty;
420
421 fn feed(&mut self, ev: ::xso::exports::rxml::Event) -> ::core::result::Result<::core::option::Option<Self::Output>, ::xso::error::Error> {
422 let inner = self.0.take().expect("feed called after completion");
423 match inner.advance(ev)? {
424 ::std::ops::ControlFlow::Continue(value) => ::core::result::Result::Ok(::core::option::Option::Some(value)),
425 ::std::ops::ControlFlow::Break(st) => {
426 self.0 = ::core::option::Option::Some(st);
427 ::core::result::Result::Ok(::core::option::Option::None)
428 }
429 }
430 }
431 }
432
433 impl #state_ty_ident {
434 fn new(
435 name: ::xso::exports::rxml::QName,
436 mut attrs: ::xso::exports::rxml::AttrMap,
437 ) -> ::core::result::Result<Self, ::xso::error::FromEventsError> {
438 #init_body
439 { let _ = &mut attrs; }
440 ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs })
441 }
442 }
443 })
444 }
445}
446
447/// # State machine to implement an `Iterator<Item = rxml::Event>`.
448///
449/// This struct represents a state machine consisting of the following parts:
450///
451/// - Extra dependencies ([`Self::defs`])
452/// - States ([`Self::state_defs`])
453/// - Transitions ([`Self::advance_match_arms`])
454/// - Entrypoints ([`Self::variants`])
455///
456/// Such a state machine is best constructed by constructing one or
457/// more [`FromEventsSubmachine`] structs and converting/merging them using
458/// `into()` and [`merge`][`Self::merge`].
459///
460/// A state machine has an output type (corresponding to
461/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
462/// by the expressions generated in the `advance_match_arms`. That means that
463/// merging submachines with different output types works, but will then generate
464/// code which will fail to compile.
465///
466/// When converted to Rust code, the state machine will manifest as (among other
467/// things) an enum type which contains all states and which has an `advance`
468/// method. That method consumes the enum value and returns either a new enum
469/// value, an error, or the output type of the state machine.
470#[derive(Default)]
471pub(crate) struct AsItemsStateMachine {
472 /// Extra items which are needed for the state machine implementation.
473 defs: TokenStream,
474
475 /// A sequence of enum variant declarations, separated and terminated by
476 /// commas.
477 state_defs: TokenStream,
478
479 /// A sequence of `match self { .. }` arms, where `self` is the state
480 /// enumeration type.
481 ///
482 /// Each match arm must either diverge or evaluate to a
483 /// `Result<(Option<State>, Option<Item>), xso::error::Error>`, where
484 /// where `State` is the state enumeration.
485 ///
486 /// If `Some(.)` is returned for the event, that event is emitted. If
487 /// `None` is returned for the event, the advance implementation is called
488 /// again after switching to the state returned in the `Option<State>`
489 /// field.
490 ///
491 /// If `None` is returned for the `Option<State>`, the iterator
492 /// terminates yielding the `Option<Item>` value directly (even if it is
493 /// `None`). After the iterator has terminated, it yields `None`
494 /// indefinitely.
495 advance_match_arms: TokenStream,
496
497 /// The different entrypoints for the state machine.
498 ///
499 /// This may only contain more than one element if an enumeration is being
500 /// serialised by the resulting state machine.
501 variants: Vec<AsItemsEntryPoint>,
502}
503
504impl AsItemsStateMachine {
505 /// Render the state machine as a token stream.
506 ///
507 /// The token stream contains the following pieces:
508 /// - Any definitions necessary for the statemachine to operate
509 /// - The state enum
510 /// - The iterator struct
511 /// - The `Iterator` impl on the builder struct
512 /// - A `fn new(T) -> Result<Self>` on the iterator struct.
513 pub(crate) fn render(
514 self,
515 vis: &Visibility,
516 input_ty: &Type,
517 state_ty_ident: &Ident,
518 item_iter_ty_lifetime: &Lifetime,
519 item_iter_ty: &Type,
520 ) -> Result<TokenStream> {
521 let Self {
522 defs,
523 state_defs,
524 advance_match_arms,
525 mut variants,
526 } = self;
527
528 let input_ty_ref = make_ty_ref(input_ty);
529 let docstr = format!("Convert a {0} into XML events.\n\nThis type is generated using the [`macro@xso::AsXml`] derive macro and implements [`std::iter:Iterator`] for {0}.", input_ty_ref);
530
531 let init_body = if variants.len() == 1 {
532 let AsItemsEntryPoint { destructure, init } = variants.remove(0);
533 quote! {
534 {
535 let #destructure = value;
536 #init
537 }
538 }
539 } else {
540 let mut match_arms = TokenStream::default();
541 for AsItemsEntryPoint { destructure, init } in variants {
542 match_arms.extend(quote! {
543 #destructure => #init,
544 });
545 }
546
547 quote! {
548 match value {
549 #match_arms
550 }
551 }
552 };
553
554 Ok(quote! {
555 #defs
556
557 enum #state_ty_ident<#item_iter_ty_lifetime> {
558 #state_defs
559 }
560
561 impl<#item_iter_ty_lifetime> #state_ty_ident<#item_iter_ty_lifetime> {
562 fn advance(mut self) -> ::core::result::Result<(::core::option::Option<Self>, ::core::option::Option<::xso::Item<#item_iter_ty_lifetime>>), ::xso::error::Error> {
563 match self {
564 #advance_match_arms
565 }
566 }
567
568 fn new(
569 value: &#item_iter_ty_lifetime #input_ty,
570 ) -> ::core::result::Result<Self, ::xso::error::Error> {
571 ::core::result::Result::Ok(#init_body)
572 }
573 }
574
575 #[doc = #docstr]
576 #vis struct #item_iter_ty(::core::option::Option<#state_ty_ident<#item_iter_ty_lifetime>>);
577
578 impl<#item_iter_ty_lifetime> ::std::iter::Iterator for #item_iter_ty {
579 type Item = ::core::result::Result<::xso::Item<#item_iter_ty_lifetime>, ::xso::error::Error>;
580
581 fn next(&mut self) -> ::core::option::Option<Self::Item> {
582 let mut state = self.0.take()?;
583 loop {
584 let (next_state, item) = match state.advance() {
585 ::core::result::Result::Ok(v) => v,
586 ::core::result::Result::Err(e) => return ::core::option::Option::Some(::core::result::Result::Err(e)),
587 };
588 if let ::core::option::Option::Some(item) = item {
589 self.0 = next_state;
590 return ::core::option::Option::Some(::core::result::Result::Ok(item));
591 }
592 // no event, do we have a state?
593 if let ::core::option::Option::Some(st) = next_state {
594 // we do: try again!
595 state = st;
596 continue;
597 } else {
598 // we don't: end of iterator!
599 self.0 = ::core::option::Option::None;
600 return ::core::option::Option::None;
601 }
602 }
603 }
604 }
605
606 impl<#item_iter_ty_lifetime> #item_iter_ty {
607 fn new(value: &#item_iter_ty_lifetime #input_ty) -> ::core::result::Result<Self, ::xso::error::Error> {
608 #state_ty_ident::new(value).map(|ok| Self(::core::option::Option::Some(ok)))
609 }
610 }
611 })
612 }
613}
614
615/// Construct a path for an intradoc link from a given type.
616fn doc_link_path(ty: &Type) -> Option<String> {
617 match ty {
618 Type::Path(ref ty) => {
619 let (mut buf, offset) = match ty.qself {
620 Some(ref qself) => {
621 let mut buf = doc_link_path(&qself.ty)?;
622 buf.push_str("::");
623 (buf, qself.position)
624 }
625 None => {
626 let mut buf = String::new();
627 if ty.path.leading_colon.is_some() {
628 buf.push_str("::");
629 }
630 (buf, 0)
631 }
632 };
633 let last = ty.path.segments.len() - 1;
634 for i in offset..ty.path.segments.len() {
635 let segment = &ty.path.segments[i];
636 buf.push_str(&segment.ident.to_string());
637 if i < last {
638 buf.push_str("::");
639 }
640 }
641 Some(buf)
642 }
643 _ => None,
644 }
645}
646
647/// Create a markdown snippet which references the given type as cleanly as
648/// possible.
649///
650/// This is used in documentation generation functions.
651///
652/// Not all types can be linked to; those which cannot be linked to will
653/// simply be wrapped in backticks.
654fn make_ty_ref(ty: &Type) -> String {
655 match doc_link_path(ty) {
656 Some(mut path) => {
657 path.reserve(4);
658 path.insert_str(0, "[`");
659 path.push_str("`]");
660 path
661 }
662 None => format!("`{}`", ty.to_token_stream()),
663 }
664}