| // Copyright 2014-2017 The html5ever Project Developers. See the |
| // COPYRIGHT file at the top-level directory of this distribution. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| /*! |
| |
| Implements the `match_token!()` macro for use by the HTML tree builder |
| in `src/tree_builder/rules.rs`. |
| |
| |
| ## Example |
| |
| ```rust |
| match_token!(token { |
| CommentToken(text) => 1, |
| |
| tag @ <base> <link> <meta> => 2, |
| |
| </head> => 3, |
| |
| </body> </html> </br> => else, |
| |
| tag @ </_> => 4, |
| |
| token => 5, |
| }) |
| ``` |
| |
| |
| ## Syntax |
| |
| Because of the simplistic parser, the macro invocation must |
| start with exactly `match_token!(token {` (with whitespace as specified) |
| and end with exactly `})`. |
| |
| The left-hand side of each match arm is an optional `name @` binding, followed by |
| |
| - an ordinary Rust pattern that starts with an identifier or an underscore, or |
| |
| - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>" |
| to match an open or close tag respectively, or |
| |
| - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags |
| respectively. |
| |
| The right-hand side is either an expression or the keyword `else`. |
| |
| Note that this syntax does not support guards or pattern alternation like |
| `Foo | Bar`. This is not a fundamental limitation; it's done for implementation |
| simplicity. |
| |
| |
| ## Semantics |
| |
| Ordinary Rust patterns match as usual. If present, the `name @` binding has |
| the usual meaning. |
| |
| A sequence of named tags matches any of those tags. A single sequence can |
| contain both open and close tags. If present, the `name @` binding binds (by |
| move) the `Tag` struct, not the outer `Token`. That is, a match arm like |
| |
| ```rust |
| tag @ <html> <head> => ... |
| ``` |
| |
| expands to something like |
| |
| ```rust |
| TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag }) |
| | TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ... |
| ``` |
| |
| A wildcard tag matches any tag of the appropriate kind, *unless* it was |
| previously matched with an `else` right-hand side (more on this below). |
| |
| The expansion of this macro reorders code somewhat, to satisfy various |
| restrictions arising from moves. However it provides the semantics of in-order |
| matching, by enforcing the following restrictions on its input: |
| |
| - The last pattern must be a variable or the wildcard "_". In other words |
| it must match everything. |
| |
| - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear |
| after wildcard tag patterns. |
| |
| - No tag name may appear more than once. |
| |
| - A wildcard tag pattern may not occur in the same arm as any other tag. |
| "<_> <html> => ..." and "<_> </_> => ..." are both forbidden. |
| |
| - The right-hand side "else" may only appear with specific-tag patterns. |
| It means that these specific tags should be handled by the last, |
| catch-all case arm, rather than by any wildcard tag arm. This situation |
| is common in the HTML5 syntax. |
| */ |
| |
| use proc_macro2::TokenStream; |
| use quote::ToTokens; |
| use std::collections::HashSet; |
| use std::fs::File; |
| use std::io::{Read, Write}; |
| use std::path::Path; |
| use syn; |
| use syn::ext::IdentExt; |
| use syn::fold::Fold; |
| use syn::parse::{Parse, ParseStream, Result}; |
| |
| pub fn expand(from: &Path, to: &Path) { |
| let mut source = String::new(); |
| File::open(from) |
| .unwrap() |
| .read_to_string(&mut source) |
| .unwrap(); |
| let ast = syn::parse_file(&source).expect("Parsing rules.rs module"); |
| let mut m = MatchTokenParser {}; |
| let ast = m.fold_file(ast); |
| let code = ast |
| .into_token_stream() |
| .to_string() |
| .replace("{ ", "{\n") |
| .replace(" }", "\n}"); |
| File::create(to) |
| .unwrap() |
| .write_all(code.as_bytes()) |
| .unwrap(); |
| } |
| |
| struct MatchTokenParser {} |
| |
| struct MatchToken { |
| ident: syn::Ident, |
| arms: Vec<MatchTokenArm>, |
| } |
| |
| struct MatchTokenArm { |
| binding: Option<syn::Ident>, |
| lhs: LHS, |
| rhs: RHS, |
| } |
| |
| enum LHS { |
| Tags(Vec<Tag>), |
| Pattern(syn::Pat), |
| } |
| |
| enum RHS { |
| Expression(syn::Expr), |
| Else, |
| } |
| |
| #[derive(PartialEq, Eq, Hash, Clone)] |
| enum TagKind { |
| StartTag, |
| EndTag, |
| } |
| |
| // Option is None if wildcard |
| #[derive(PartialEq, Eq, Hash, Clone)] |
| pub struct Tag { |
| kind: TagKind, |
| name: Option<syn::Ident>, |
| } |
| |
| impl Parse for Tag { |
| fn parse(input: ParseStream) -> Result<Self> { |
| input.parse::<Token![<]>()?; |
| let closing: Option<Token![/]> = input.parse()?; |
| let name = match input.call(syn::Ident::parse_any)? { |
| ref wildcard if wildcard == "_" => None, |
| other => Some(other), |
| }; |
| input.parse::<Token![>]>()?; |
| Ok(Tag { |
| kind: if closing.is_some() { |
| TagKind::EndTag |
| } else { |
| TagKind::StartTag |
| }, |
| name: name, |
| }) |
| } |
| } |
| |
| impl Parse for LHS { |
| fn parse(input: ParseStream) -> Result<Self> { |
| if input.peek(Token![<]) { |
| let mut tags = Vec::new(); |
| while !input.peek(Token![=>]) { |
| tags.push(input.parse()?); |
| } |
| Ok(LHS::Tags(tags)) |
| } else { |
| let p: syn::Pat = input.parse()?; |
| Ok(LHS::Pattern(p)) |
| } |
| } |
| } |
| |
| impl Parse for MatchTokenArm { |
| fn parse(input: ParseStream) -> Result<Self> { |
| let binding = if input.peek2(Token![@]) { |
| let binding = input.parse::<syn::Ident>()?; |
| input.parse::<Token![@]>()?; |
| Some(binding) |
| } else { |
| None |
| }; |
| let lhs = input.parse::<LHS>()?; |
| input.parse::<Token![=>]>()?; |
| let rhs = if input.peek(syn::token::Brace) { |
| let block = input.parse::<syn::Block>().unwrap(); |
| let block = syn::ExprBlock { |
| attrs: vec![], |
| label: None, |
| block, |
| }; |
| input.parse::<Option<Token![,]>>()?; |
| RHS::Expression(syn::Expr::Block(block)) |
| } else if input.peek(Token![else]) { |
| input.parse::<Token![else]>()?; |
| input.parse::<Token![,]>()?; |
| RHS::Else |
| } else { |
| let expr = input.parse::<syn::Expr>().unwrap(); |
| input.parse::<Option<Token![,]>>()?; |
| RHS::Expression(expr) |
| }; |
| |
| Ok(MatchTokenArm { binding, lhs, rhs }) |
| } |
| } |
| |
| impl Parse for MatchToken { |
| fn parse(input: ParseStream) -> Result<Self> { |
| let ident = input.parse::<syn::Ident>()?; |
| let content; |
| braced!(content in input); |
| let mut arms = vec![]; |
| while !content.is_empty() { |
| arms.push(content.parse()?); |
| } |
| Ok(MatchToken { ident, arms }) |
| } |
| } |
| |
| pub fn expand_match_token(body: &TokenStream) -> syn::Expr { |
| let match_token = syn::parse2::<MatchToken>(body.clone()); |
| let ast = expand_match_token_macro(match_token.unwrap()); |
| syn::parse2(ast.into()).unwrap() |
| } |
| |
| fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { |
| let mut arms = match_token.arms; |
| let to_be_matched = match_token.ident; |
| // Handle the last arm specially at the end. |
| let last_arm = arms.pop().unwrap(); |
| |
| // Tags we've seen, used for detecting duplicates. |
| let mut seen_tags: HashSet<Tag> = HashSet::new(); |
| |
| // Case arms for wildcard matching. We collect these and |
| // emit them later. |
| let mut wildcards_patterns: Vec<TokenStream> = Vec::new(); |
| let mut wildcards_expressions: Vec<syn::Expr> = Vec::new(); |
| |
| // Tags excluded (by an 'else' RHS) from wildcard matching. |
| let mut wild_excluded_patterns: Vec<TokenStream> = Vec::new(); |
| |
| let mut arms_code = Vec::new(); |
| |
| for MatchTokenArm { binding, lhs, rhs } in arms { |
| // Build Rust syntax for the `name @` binding, if any. |
| let binding = match binding { |
| Some(ident) => quote!(#ident @), |
| None => quote!(), |
| }; |
| |
| match (lhs, rhs) { |
| (LHS::Pattern(_), RHS::Else) => { |
| panic!("'else' may not appear with an ordinary pattern") |
| }, |
| |
| // ordinary pattern => expression |
| (LHS::Pattern(pat), RHS::Expression(expr)) => { |
| if !wildcards_patterns.is_empty() { |
| panic!( |
| "ordinary patterns may not appear after wildcard tags {:?} {:?}", |
| pat, expr |
| ); |
| } |
| arms_code.push(quote!(#binding #pat => #expr,)) |
| }, |
| |
| // <tag> <tag> ... => else |
| (LHS::Tags(tags), RHS::Else) => { |
| for tag in tags { |
| if !seen_tags.insert(tag.clone()) { |
| panic!("duplicate tag"); |
| } |
| if tag.name.is_none() { |
| panic!("'else' may not appear with a wildcard tag"); |
| } |
| wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag)); |
| } |
| }, |
| |
| // <_> => expression |
| // <tag> <tag> ... => expression |
| (LHS::Tags(tags), RHS::Expression(expr)) => { |
| // Is this arm a tag wildcard? |
| // `None` if we haven't processed the first tag yet. |
| let mut wildcard = None; |
| for tag in tags { |
| if !seen_tags.insert(tag.clone()) { |
| panic!("duplicate tag"); |
| } |
| |
| match tag.name { |
| // <tag> |
| Some(_) => { |
| if !wildcards_patterns.is_empty() { |
| panic!("specific tags may not appear after wildcard tags"); |
| } |
| |
| if wildcard == Some(true) { |
| panic!("wildcard tags must appear alone"); |
| } |
| |
| if wildcard.is_some() { |
| // Push the delimeter `|` if it's not the first tag. |
| arms_code.push(quote!( | )) |
| } |
| arms_code.push(make_tag_pattern(&binding, tag)); |
| |
| wildcard = Some(false); |
| }, |
| |
| // <_> |
| None => { |
| if wildcard.is_some() { |
| panic!("wildcard tags must appear alone"); |
| } |
| wildcard = Some(true); |
| wildcards_patterns.push(make_tag_pattern(&binding, tag)); |
| wildcards_expressions.push(expr.clone()); |
| }, |
| } |
| } |
| |
| match wildcard { |
| None => panic!("[internal macro error] tag arm with no tags"), |
| Some(false) => arms_code.push(quote!( => #expr,)), |
| Some(true) => {}, // codegen for wildcards is deferred |
| } |
| }, |
| } |
| } |
| |
| // Time to process the last, catch-all arm. We will generate something like |
| // |
| // last_arm_token => { |
| // let enable_wildcards = match last_arm_token { |
| // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false, |
| // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false, |
| // // ... |
| // _ => true, |
| // }; |
| // |
| // match (enable_wildcards, last_arm_token) { |
| // (true, TagToken(name @ Tag { kind: StartTag, .. })) |
| // => ..., // wildcard action for start tags |
| // |
| // (true, TagToken(name @ Tag { kind: EndTag, .. })) |
| // => ..., // wildcard action for end tags |
| // |
| // (_, token) => ... // using the pattern from that last arm |
| // } |
| // } |
| |
| let MatchTokenArm { binding, lhs, rhs } = last_arm; |
| |
| let (last_pat, last_expr) = match (binding, lhs, rhs) { |
| (Some(_), _, _) => panic!("the last arm cannot have an @-binding"), |
| (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"), |
| (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"), |
| (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e), |
| }; |
| |
| quote! { |
| match #to_be_matched { |
| #( |
| #arms_code |
| )* |
| last_arm_token => { |
| let enable_wildcards = match last_arm_token { |
| #( |
| #wild_excluded_patterns => false, |
| )* |
| _ => true, |
| }; |
| match (enable_wildcards, last_arm_token) { |
| #( |
| (true, #wildcards_patterns) => #wildcards_expressions, |
| )* |
| (_, #last_pat) => #last_expr, |
| } |
| } |
| } |
| } |
| } |
| |
| impl Fold for MatchTokenParser { |
| fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt { |
| match stmt { |
| syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => { |
| if mac.path == parse_quote!(match_token) { |
| return syn::fold::fold_stmt( |
| self, |
| syn::Stmt::Expr(expand_match_token(&mac.tts)), |
| ); |
| } |
| }, |
| _ => {}, |
| } |
| |
| syn::fold::fold_stmt(self, stmt) |
| } |
| |
| fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr { |
| match expr { |
| syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => { |
| if mac.path == parse_quote!(match_token) { |
| return syn::fold::fold_expr(self, expand_match_token(&mac.tts)); |
| } |
| }, |
| _ => {}, |
| } |
| |
| syn::fold::fold_expr(self, expr) |
| } |
| } |
| |
| fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream { |
| let kind = match tag.kind { |
| TagKind::StartTag => quote!(::tokenizer::StartTag), |
| TagKind::EndTag => quote!(::tokenizer::EndTag), |
| }; |
| let name_field = if let Some(name) = tag.name { |
| let name = name.to_string(); |
| quote!(name: local_name!(#name),) |
| } else { |
| quote!() |
| }; |
| quote! { |
| ::tree_builder::types::TagToken(#binding ::tokenizer::Tag { kind: #kind, #name_field .. }) |
| } |
| } |