pear_codegen/
lib.rs

1#![recursion_limit="256"]
2
3extern crate proc_macro;
4extern crate proc_macro2;
5extern crate syn;
6#[macro_use] extern crate quote;
7
8mod parser;
9
10use syn::parse::Parser;
11use syn::visit_mut::{self, VisitMut};
12use syn::spanned::Spanned;
13
14use proc_macro2::TokenStream;
15use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt};
16
17use crate::parser::*;
18
19fn parse_marker_ident(span: proc_macro2::Span) -> syn::Ident {
20    const PARSE_MARKER_IDENT: &str = "____parse_parse_marker";
21    syn::Ident::new(PARSE_MARKER_IDENT, span)
22}
23
24fn parser_info_ident(span: proc_macro2::Span) -> syn::Ident {
25    const PARSE_INFO_IDENT: &str = "____parse_parser_info";
26    syn::Ident::new(PARSE_INFO_IDENT, span)
27}
28
29#[derive(Copy, Clone)]
30enum State {
31    Start,
32    InTry
33}
34
35#[derive(Clone)]
36struct ParserTransformer {
37    input: syn::Expr,
38    output: syn::Type,
39    state: State,
40}
41
42impl ParserTransformer {
43    fn new(input: syn::Expr, output: syn::Type) -> ParserTransformer {
44        ParserTransformer { input, output, state: State::Start }
45    }
46}
47
48impl VisitMut for ParserTransformer {
49    fn visit_expr_try_mut(&mut self, v: &mut syn::ExprTry) {
50        let last_state = self.state;
51        self.state = State::InTry;
52        visit_mut::visit_expr_try_mut(self, v);
53        self.state = last_state;
54
55        let expr = &v.expr;
56        let new_expr = quote_spanned!(expr.span() => #expr.map_err(|e| e.into()));
57        let method_call: syn::Expr = syn::parse2(new_expr).expect("okay");
58        v.expr = Box::new(method_call);
59    }
60
61    fn visit_expr_call_mut(&mut self, call: &mut syn::ExprCall) {
62        if let State::InTry = self.state {
63            // TODO: Should we keep recursing?
64            call.args.insert(0, self.input.clone());
65
66            // Only insert into the _first_ call.
67            self.state = State::Start;
68        } else {
69            visit_mut::visit_expr_call_mut(self, call);
70        }
71    }
72
73    fn visit_macro_mut(&mut self, m: &mut syn::Macro) {
74        if let Some(segment) = m.path.segments.last() {
75            let name = segment.ident.to_string();
76            if name == "switch" || name.starts_with("parse_") {
77                let (input, output) = (&self.input, &self.output);
78                let tokens = match syn::parse2::<syn::Expr>(m.tokens.clone()) {
79                    Ok(mut expr) => {
80                        let mut transformer = self.clone();
81                        transformer.state = State::Start;
82                        visit_mut::visit_expr_mut(&mut transformer, &mut expr);
83                        quote!(#expr)
84                    },
85                    Err(_) => m.tokens.clone()
86                };
87
88                let info = parser_info_ident(self.input.span());
89                let mark = parse_marker_ident(m.span());
90
91                let parser_info = quote!([#info; #input; #mark; #output]);
92                m.tokens = quote_spanned!(m.span() => #parser_info #tokens);
93            }
94        }
95    }
96}
97
98fn extract_input_ident_ty(f: &syn::ItemFn) -> PResult<(syn::Ident, syn::Type)> {
99    use syn::{FnArg::Typed, PatType, Pat::Ident, Type::Reference};
100
101    let first = f.sig.inputs.first().ok_or_else(|| {
102        let paren_span = f.sig.paren_token.span.join();
103        paren_span.error("parsing functions require at least one input")
104    })?;
105
106    let e = first.span().error("invalid type for parser input");
107    match first {
108        Typed(PatType { pat, ty, .. }) => match **pat {
109            Ident(ref p) => match **ty {
110                Reference(ref r) => Ok((p.ident.clone(), *r.elem.clone())),
111                _ => Err(e)
112            }
113            _ => Err(e)
114        }
115        _ => Err(first.span().error("invalid type for parser input"))
116    }
117}
118
119fn wrapping_fn_block(
120    function: &syn::ItemFn,
121    scope: TokenStream,
122    args: &AttrArgs,
123    ret_ty: &syn::Type,
124) -> PResult<syn::Block> {
125    let (input, input_ty) = extract_input_ident_ty(function)?;
126    let fn_block = &function.block;
127
128    let span = function.span();
129    let mark_ident = parse_marker_ident(input.span());
130    let info_ident = parser_info_ident(function.sig.ident.span());
131    let result_map = match args.raw.is_some() {
132        true => quote_spanned!(span => (
133            |#info_ident: &#scope::input::ParserInfo, #mark_ident: &mut <#input_ty as #scope::input::Input>::Marker| {
134                #fn_block
135            })
136        ),
137        false => quote_spanned!(span => (
138            |#info_ident: &#scope::input::ParserInfo, #mark_ident: &mut <#input_ty as #scope::input::Input>::Marker| {
139                use #scope::result::IntoResult;
140                IntoResult::into_result(#fn_block)
141            }
142        ))
143    };
144
145    let rewind_expr = |span| quote_spanned! { span =>
146        <#input_ty as #scope::input::Rewind>::rewind_to(#input, ___mark);
147    };
148
149    let (rewind, peek) = (args.rewind.map(rewind_expr), args.peek.map(rewind_expr));
150    let new_block_tokens = {
151        let (name, raw) = (&function.sig.ident, args.raw.is_some());
152        let name_str = name.to_string();
153        quote_spanned!(span => {
154            let ___info = #scope::input::ParserInfo { name: #name_str, raw: #raw };
155            if let Some(ref mut ___debugger) = #input.options.debugger {
156                ___debugger.on_entry(&___info);
157            }
158
159            let mut ___mark = #scope::input::Input::mark(#input, &___info);
160            let mut ___res: #ret_ty = #result_map(&___info, &mut ___mark);
161            match ___res {
162                Ok(_) => { #peek },
163                Err(ref mut ___e) if #input.options.stacked_context => {
164                    let ___ctxt = #scope::input::Input::context(#input, ___mark);
165                    ___e.push_info(___info, ___ctxt);
166                    #rewind
167                },
168                Err(_) => { #rewind },
169            }
170
171            if #input.options.debugger.is_some() {
172                let ___ctxt = #scope::input::Input::context(#input, ___mark);
173                if let Some(ref mut ___debugger) = #input.options.debugger {
174                    ___debugger.on_exit(&___info, ___res.is_ok(), ___ctxt);
175                }
176            }
177
178            ___res
179        })
180    };
181
182    syn::parse(new_block_tokens.into())
183        .map_err(|e| function.span().error(format!("bad function: {}", e)))
184}
185
186fn parser_attribute(input: proc_macro::TokenStream, args: &AttrArgs) -> PResult<TokenStream> {
187    let input: proc_macro2::TokenStream = input.into();
188    let span = input.span();
189    let mut function: syn::ItemFn = syn::parse2(input).map_err(|_| {
190        span.error("`parser` attribute only supports functions")
191    })?;
192
193    let ret_ty: syn::Type = match &function.sig.output {
194        syn::ReturnType::Default => {
195            return Err(function.sig.span().error("parse function requires return type"));
196        },
197        syn::ReturnType::Type(_, ty) => (**ty).clone(),
198    };
199
200    let (input_ident, _) = extract_input_ident_ty(&function)?;
201    let input_expr: syn::Expr = syn::parse2(quote!(#input_ident)).unwrap();
202    let mut transformer = ParserTransformer::new(input_expr, ret_ty.clone());
203    visit_mut::visit_item_fn_mut(&mut transformer, &mut function);
204
205    let scope = args.raw.map(|_| quote!(crate)).unwrap_or_else(|| quote!(pear));
206    let inline = syn::Attribute::parse_outer.parse2(quote!(#[inline])).unwrap();
207    function.block = Box::new(wrapping_fn_block(&function, scope, args, &ret_ty)?);
208    function.attrs.extend(inline);
209
210    Ok(quote! {
211        #[allow(clippy::all, clippy::pedantic, clippy::nursery)]
212        #function
213    })
214}
215
216impl Case {
217    fn to_tokens<'a, I>(context: &Context, mut cases: I) -> TokenStream
218        where I: Iterator<Item = &'a Case>
219    {
220        let this = match cases.next() {
221            None => return quote!(),
222            Some(case) => case
223        };
224
225        let (input, output) = (&context.input, &context.output);
226        let mut transformer = ParserTransformer::new(input.clone(), output.clone());
227        let mut case_expr = this.expr.clone();
228        visit_mut::visit_expr_mut(&mut transformer, &mut case_expr);
229
230        match this.pattern {
231            Pattern::Wild(..) => match this.guard.as_ref() {
232                Some(guard) => {
233                    let rest_tokens = Case::to_tokens(context, cases);
234                    quote!(if #guard { #case_expr } else { #rest_tokens })
235                }
236                None => quote!(#case_expr),
237            }
238            Pattern::Calls(ref calls) => {
239                let case_branch = calls.iter().enumerate().map(|(i, call)| {
240                    let prefix = match i {
241                        0 => quote!(if),
242                        _ => quote!(else if)
243                    };
244
245                    let name = call.name.clone()
246                        .unwrap_or_else(|| syn::Ident::new("___", call.span()));
247
248                    // FIXME: We're repeating ourselves, aren't we? We alrady do
249                    // this in input insertion in the visitor.
250                    let mut call_expr = call.expr.clone();
251                    call_expr.args.insert(0, input.clone());
252                    let call_expr = quote!({
253                        let ___preserve_error = #input.emit_error;
254                        #input.emit_error = false;
255                        let ___call_result = #call_expr;
256                        #input.emit_error = ___preserve_error;
257                        ___call_result
258                    });
259
260                    let guarded_call = this.guard.as_ref()
261                        .map(|guard| &guard.expr)
262                        .map(|guard| quote!({
263                            match #call_expr {
264                                Ok(#name) if #guard => Some(#name),
265                                _ => None,
266                            }
267                        }))
268                        .unwrap_or_else(|| quote!(#call_expr.ok()));
269
270                    quote! {
271                        #prefix let Some(#name) = #guarded_call {
272                            #case_expr
273                        }
274                    }
275                });
276
277                let rest_tokens = Case::to_tokens(context, cases);
278                quote_spanned! { this.span =>
279                    #(#case_branch)*
280                    else { #rest_tokens }
281                }
282            }
283        }
284    }
285}
286
287impl Switch {
288    fn to_tokens(&self) -> TokenStream {
289        Case::to_tokens(&self.context, self.cases.iter())
290    }
291}
292
293/// The core attribute macro. Can only be applied to free functions with at
294/// least one parameter and a return value. To typecheck, the free function must
295/// meet the following typing requirements:
296///
297/// - The _first_ parameter's type must be a mutable reference to a [`Pear<I>`]
298///   here `I` implements [`Input`]. This is the _input_ parameter.
299/// - The return type must be [`Result<O, I>`] where `I` is the inner type
300///   of the input parameter and `O` can be any type.
301///
302/// The following transformations are applied to the _contents_ of the
303/// attributed function:
304///
305/// - The functions first parameter (of type `&mut Pear<I>`) is passed as the
306///   first parameter to every function call in the function with a posfix
307///   `?`. That is, every function call of the form `foo(a, b, c, ...)?` is
308///   converted to `foo(input, a, b, c, ...)?` where `input` is the input
309///   parameter.
310/// - The inputs to every macro whose name starts with `parse_` are prefixed
311///   with `[PARSER_NAME, INPUT, MARKER, OUTPUT]` where `PARSER_NAME` is the
312///   raw string literal of the functon's name, `INPUT` is the input
313///   parameter expression, `MARKER` is the marker expression, and `OUTPUT`
314///   is the output type. Aditionally, if the input to the macro is a valid
315///   Rust expression, it is applied the same transformations as a function
316///   atributed with `#[parser]`.
317///
318///   Declare a `parse_` macro as:
319///
320///   ```rust,ignore
321///   macro_rules! parse_my_macro {
322///       ([$n:expr; $i:expr; $m:expr; $T:ty] ..) => {
323///           /* .. */
324///       }
325///   }
326///   ```
327///
328/// The following transformations are applied _around_ the attributed
329/// function:
330///
331/// - The [`Input::mark()`] method is called before the function executes.
332///   The returned mark, if any, is stored on the stack.
333/// - A return value of `O` is automatically converted (or "lifted") into a
334///   type of [`Result<O, I>`] by wrapping it in `Ok`.
335/// - If the function returns an `Err`, [`Input::context()`] is called with
336///   the current mark, and the returned context, if any, is pushed into the
337///   error via [`ParseError::push_context()`].
338/// - The [`Input::unmark()`] method is called after the function executes,
339///   passing in the current mark.
340///
341/// # Example
342///
343/// ```rust
344/// use pear::input::{Pear, Text, Result};
345/// use pear::macros::{parser, parse};
346/// use pear::parsers::*;
347/// #
348/// # use pear::macros::parse_declare;
349/// # parse_declare!(Input<'a>(Token = char, Slice = &'a str, Many = &'a str));
350///
351/// #[parser]
352/// fn ab_in_dots<'a, I: Input<'a>>(input: &mut Pear<I>) -> Result<&'a str, I> {
353///     eat('.')?;
354///     let inside = take_while(|&c| c == 'a' || c == 'b')?;
355///     eat('.')?;
356///
357///     inside
358/// }
359///
360/// #
361/// let x = parse!(ab_in_dots: Text::from(".abba."));
362/// assert_eq!(x.unwrap(), "abba");
363///
364/// let x = parse!(ab_in_dots: Text::from(".ba."));
365/// assert_eq!(x.unwrap(), "ba");
366///
367/// let x = parse!(ab_in_dots: Text::from("..."));
368/// assert!(x.is_err());
369/// ```
370#[proc_macro_attribute]
371pub fn parser(
372    args: proc_macro::TokenStream,
373    input: proc_macro::TokenStream
374) -> proc_macro::TokenStream {
375    let args = match AttrArgs::syn_parse.parse(args) {
376        Ok(args) => args,
377        Err(e) => return Diagnostic::from(e).emit_as_item_tokens().into(),
378    };
379
380    match parser_attribute(input, &args) {
381        Ok(tokens) => tokens.into(),
382        Err(diag) => diag.emit_as_item_tokens().into(),
383    }
384}
385
386/// Invoked much like match, except each condition must be a parser, which is
387/// executed, and the corresponding arm is executed only if the parser succeeds.
388/// Once a condition succeeds, no other condition is executed.
389///
390/// ```rust,ignore
391/// switch! {
392///     parser() => expr,
393///     x@parser1() | x@parser2(a, b, c) => expr(x),
394///     _ => last_expr
395/// }
396/// ```
397#[proc_macro]
398pub fn switch(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
399    // TODO: We lose diagnostic information by using syn's thing here. We need a
400    // way to get a SynParseStream from a TokenStream to not do that.
401    match Switch::syn_parse.parse(input) {
402        Ok(switch) => switch.to_tokens().into(),
403        Err(e) => Diagnostic::from(e).emit_as_expr_tokens().into(),
404    }
405}