| // pest. The Elegant Parser |
| // Copyright (c) 2018 DragoČ™ Tiselice |
| // |
| // Licensed under the Apache License, Version 2.0 |
| // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
| // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. All files in the project carrying such notice may not be copied, |
| // modified, or distributed except according to those terms. |
| //! Pest meta-grammar |
| //! |
| //! # Warning: Semantic Versioning |
| //! There may be non-breaking changes to the meta-grammar |
| //! between minor versions. Those non-breaking changes, however, |
| //! may translate into semver-breaking changes due to the additional variants |
| //! added to the `Rule` enum. This is a known issue and will be fixed in the |
| //! future (e.g. by increasing MSRV and non_exhaustive annotations). |
| |
| /// The top-level rule of a grammar. |
| grammar_rules = _{ SOI ~ grammar_doc* ~ grammar_rule* ~ EOI } |
| |
| /// A rule of a grammar. |
| grammar_rule = { |
| identifier ~ assignment_operator ~ modifier? ~ opening_brace ~ expression ~ closing_brace |
| | line_doc |
| } |
| |
| /// Assignment operator. |
| assignment_operator = { "=" } |
| |
| /// Opening brace for a rule. |
| opening_brace = { "{" } |
| |
| /// Closing brace for a rule. |
| closing_brace = { "}" } |
| |
| /// Opening parenthesis for a branch, PUSH, etc. |
| opening_paren = { "(" } |
| |
| /// Closing parenthesis for a branch, PUSH, etc. |
| closing_paren = { ")" } |
| |
| /// Opening bracket for PEEK (slice inside). |
| opening_brack = { "[" } |
| |
| /// Closing bracket for PEEK (slice inside). |
| closing_brack = { "]" } |
| |
| /// A rule modifier. |
| modifier = _{ |
| silent_modifier |
| | atomic_modifier |
| | compound_atomic_modifier |
| | non_atomic_modifier |
| } |
| |
| /// Silent rule prefix. |
| silent_modifier = { "_" } |
| |
| /// Atomic rule prefix. |
| atomic_modifier = { "@" } |
| |
| /// Compound atomic rule prefix. |
| compound_atomic_modifier = { "$" } |
| |
| /// Non-atomic rule prefix. |
| non_atomic_modifier = { "!" } |
| |
| /// A tag label. |
| tag_id = @{ "#" ~ ("_" | alpha) ~ ("_" | alpha_num)* } |
| |
| /// For assigning labels to nodes. |
| node_tag = _{ tag_id ~ assignment_operator } |
| |
| /// A rule expression. |
| expression = { choice_operator? ~ term ~ (infix_operator ~ term)* } |
| |
| /// A rule term. |
| term = { node_tag? ~ prefix_operator* ~ node ~ postfix_operator* } |
| |
| /// A rule node (inside terms). |
| node = _{ opening_paren ~ expression ~ closing_paren | terminal } |
| |
| /// A terminal expression. |
| terminal = _{ _push | peek_slice | identifier | string | insensitive_string | range } |
| |
| /// Possible predicates for a rule. |
| prefix_operator = _{ positive_predicate_operator | negative_predicate_operator } |
| |
| /// Branches or sequences. |
| infix_operator = _{ sequence_operator | choice_operator } |
| |
| /// Possible modifiers for a rule. |
| postfix_operator = _{ |
| optional_operator |
| | repeat_operator |
| | repeat_once_operator |
| | repeat_exact |
| | repeat_min |
| | repeat_max |
| | repeat_min_max |
| } |
| |
| /// A positive predicate. |
| positive_predicate_operator = { "&" } |
| |
| /// A negative predicate. |
| negative_predicate_operator = { "!" } |
| |
| /// A sequence operator. |
| sequence_operator = { "~" } |
| |
| /// A choice operator. |
| choice_operator = { "|" } |
| |
| /// An optional operator. |
| optional_operator = { "?" } |
| |
| /// A repeat operator. |
| repeat_operator = { "*" } |
| |
| /// A repeat at least once operator. |
| repeat_once_operator = { "+" } |
| |
| /// A repeat exact times. |
| repeat_exact = { opening_brace ~ number ~ closing_brace } |
| |
| /// A repeat at least times. |
| repeat_min = { opening_brace ~ number ~ comma ~ closing_brace } |
| |
| /// A repeat at most times. |
| repeat_max = { opening_brace ~ comma ~ number ~ closing_brace } |
| |
| /// A repeat in a range. |
| repeat_min_max = { opening_brace ~ number ~ comma ~ number ~ closing_brace } |
| |
| /// A number. |
| number = @{ '0'..'9'+ } |
| |
| /// An integer number (positive or negative). |
| integer = @{ number | "-" ~ "0"* ~ '1'..'9' ~ number? } |
| |
| /// A comma terminal. |
| comma = { "," } |
| |
| /// A PUSH expression. |
| _push = { "PUSH" ~ opening_paren ~ expression ~ closing_paren } |
| |
| /// A PEEK expression. |
| peek_slice = { "PEEK" ~ opening_brack ~ integer? ~ range_operator ~ integer? ~ closing_brack } |
| |
| /// An identifier. |
| identifier = @{ !"PUSH" ~ ("_" | alpha) ~ ("_" | alpha_num)* } |
| |
| /// An alpha character. |
| alpha = _{ 'a'..'z' | 'A'..'Z' } |
| |
| /// An alphanumeric character. |
| alpha_num = _{ alpha | '0'..'9' } |
| |
| /// A string. |
| string = ${ quote ~ inner_str ~ quote } |
| |
| /// An insensitive string. |
| insensitive_string = { "^" ~ string } |
| |
| /// A character range. |
| range = { character ~ range_operator ~ character } |
| |
| /// A single quoted character |
| character = ${ single_quote ~ inner_chr ~ single_quote } |
| |
| /// A quoted string. |
| inner_str = @{ (!("\"" | "\\") ~ ANY)* ~ (escape ~ inner_str)? } |
| |
| /// An escaped or any character. |
| inner_chr = @{ escape | ANY } |
| |
| /// An escape sequence. |
| escape = @{ "\\" ~ ("\"" | "\\" | "r" | "n" | "t" | "0" | "'" | code | unicode) } |
| |
| /// A hexadecimal code. |
| code = @{ "x" ~ hex_digit{2} } |
| |
| /// A unicode code. |
| unicode = @{ "u" ~ opening_brace ~ hex_digit{2, 6} ~ closing_brace } |
| |
| /// A hexadecimal digit. |
| hex_digit = @{ '0'..'9' | 'a'..'f' | 'A'..'F' } |
| |
| /// A double quote. |
| quote = { "\"" } |
| |
| /// A single quote. |
| single_quote = { "'" } |
| |
| /// A range operator. |
| range_operator = { ".." } |
| |
| /// A newline character. |
| newline = _{ "\n" | "\r\n" } |
| |
| /// A whitespace character. |
| WHITESPACE = _{ " " | "\t" | newline } |
| |
| /// A single line comment. |
| line_comment = _{ ("//" ~ !("/" | "!") ~ (!newline ~ ANY)*) } |
| |
| /// A multi-line comment. |
| block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" } |
| |
| /// A grammar comment. |
| COMMENT = _{ block_comment | line_comment } |
| |
| // ref: https://doc.rust-lang.org/reference/comments.html |
| /// A space character. |
| space = _{ " " | "\t" } |
| |
| /// A top-level comment. |
| grammar_doc = ${ "//!" ~ space? ~ inner_doc } |
| |
| /// A rule comment. |
| line_doc = ${ "///" ~ space? ~ inner_doc } |
| |
| /// A comment content. |
| inner_doc = @{ (!newline ~ ANY)* } |