| #!/usr/bin/ruby |
| # encoding: utf-8 |
| |
| =begin LICENSE |
| |
| [The "BSD licence"] |
| Copyright (c) 2009-2010 Kyle Yetter |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions |
| are met: |
| |
| 1. Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| 2. Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| 3. The name of the author may not be used to endorse or promote products |
| derived from this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| =end |
| |
| require 'antlr3' |
| |
| module ANTLR3 |
| module AST |
| |
| =begin rdoc ANTLR3::AST::Wizard |
| |
| AST::Wizard is an extra utility class that allows quick creation of AST objects |
| using definitions writing in ANTLR-style tree definition syntax. It can also |
| define <i>tree patterns</i>, objects that are conceptually similar to regular |
| expressions. Patterns allow a simple method for recursively searching through an |
| AST for a particular node structure. These features make tree wizards useful |
| while testing and debugging AST constructing parsers and tree parsers. This |
| library has been ported to Ruby directly from the ANTLR Python runtime API. |
| |
| See |
| http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard |
| for more background on the concept of a tree wizard. |
| |
| == Usage |
| |
| # setting up and creating a tree wizard |
| token_names = Array.new(4, '') + %w(VAR NUMBER EQ PLUS MINUS MULT DIV) |
| adaptor = ANTLR3::AST::CommonTreeAdaptor.new |
| wizard = ANTLR3::AST::Wizard.new(adaptor, token_names) |
| |
| # building trees |
| lone_node = wizard.create "VAR[x]" # => x |
| lone_node.type # => 4 # = VAR |
| lone_node.text # => "x" |
| |
| expression_node = wizard.create "(MINUS VAR NUMBER)" |
| # => (MINUS VAR NUMBER) |
| statement_node = wizard.create "(EQ[=] VAR[x] (PLUS[+] NUMBER[1] NUMBER[2]))" |
| # => (= x (+ 1 2)) |
| deep_node = wizard.create(<<-TREE) |
| (MULT[*] NUMBER[1] |
| (MINUS[-] |
| (MULT[*] NUMBER[3] VAR[x]) |
| (DIV[/] VAR[y] NUMBER[3.14]) |
| (MULT[*] NUMBER[4] VAR[z]) |
| ) |
| ) |
| TREE |
| # => (* 1 (- (* 3 x) (/ y 3.14) (* 4 z)) |
| |
| bad_tree_syntax = wizard.create "(+ 1 2)" |
| # => nil - invalid node names |
| |
| # test whether a tree matches a pattern |
| wizard.match(expression_node, '(MINUS VAR .)') # => true |
| wizard.match(lone_node, 'NUMBER NUMBER') # => false |
| |
| # extract nodes matching a pattern |
| wizard.find(statement_node, '(PLUS . .)') |
| # => [(+ 1 2)] |
| wizard.find(deep_node, 4) # where 4 is the value of type VAR |
| # => [x, y, z] |
| |
| # iterate through the tree and extract nodes with pattern labels |
| wizard.visit(deep_node, '(MULT %n:NUMBER %v:.)') do |node, parent, local_index, labels| |
| printf "n = %p\n, v = %p\n", labels['n'], labels['v'] |
| end |
| # => prints out: |
| # n = 3, v = x |
| # n = 4, v = z |
| |
| == Tree Construction Syntax |
| |
| Simple Token Node: TK |
| Token Node With Text: TK[text] |
| Flat Node List: (nil TK1 TK2) |
| General Node: (RT TK1 TK2) |
| Complex Nested Node: (RT (SUB1[sometext] TK1) TK2 (SUB2 TK3 TK4[moretext])) |
| |
| === Additional Syntax for Tree Matching Patterns |
| |
| Match Any Token Node: . |
| Label a Node: %name:TK |
| |
| =end |
| |
| class Wizard |
| |
| include Constants |
| include Util |
| |
| =begin rdoc ANTLR3::AST::Wizard::PatternLexer |
| |
| A class that is used internally by AST::Wizard to tokenize tree patterns |
| |
| =end |
| |
| class PatternLexer |
| include ANTLR3::Constants |
| |
| autoload :StringScanner, 'strscan' |
| |
| PATTERNS = [ |
| [ :space, /\s+/ ], |
| [ :identifier, /[a-z_]\w*/i ], |
| [ :open, /\(/ ], |
| [ :close, /\)/ ], |
| [ :percent, /%/ ], |
| [ :colon, /:/ ], |
| [ :dot, /\./ ], |
| [ :argument, /\[((?:[^\[\]\\]|\\\[|\\\]|\\.)*?)\]/ ] |
| ] |
| |
| attr_reader :text, :error, :pattern |
| def initialize( pattern ) |
| @pattern = pattern.to_s |
| @scanner = StringScanner.new( pattern ) |
| @text = '' |
| @error = false |
| end |
| |
| def next_token |
| begin |
| @scanner.eos? and return EOF |
| |
| type, = PATTERNS.find do |type, pattern| |
| @scanner.scan( pattern ) |
| end |
| |
| case type |
| when nil |
| type, @text, @error = EOF, '', true |
| break |
| when :identifier then @text = @scanner.matched |
| when :argument |
| # remove escapes from \] sequences in the text argument |
| ( @text = @scanner[ 1 ] ).gsub!( /\\(?=[\[\]])/, '' ) |
| end |
| end while type == :space |
| |
| return type |
| end |
| |
| alias error? error |
| end |
| |
| |
| =begin rdoc ANTLR3::AST::Wizard::Pattern |
| |
| A class that is used internally by AST::Wizard to construct AST tree objects |
| from a tokenized tree pattern |
| |
| =end |
| |
| class PatternParser |
| def self.parse( pattern, token_scheme, adaptor ) |
| lexer = PatternLexer.new( pattern ) |
| new( lexer, token_scheme, adaptor ).pattern |
| end |
| |
| include ANTLR3::Constants |
| |
| def initialize( tokenizer, token_scheme, adaptor ) |
| @tokenizer = tokenizer |
| @token_scheme = token_scheme |
| @adaptor = adaptor |
| @token_type = tokenizer.next_token |
| end |
| |
| def pattern |
| case @token_type |
| when :open then return parse_tree |
| when :identifier |
| node = parse_node |
| @token_type == EOF and return node |
| return nil |
| end |
| return nil |
| end |
| |
| CONTINUE_TYPES = [ :open, :identifier, :percent, :dot ] |
| |
| def parse_tree |
| @token_type != :open and return nil |
| @token_type = @tokenizer.next_token |
| root = parse_node or return nil |
| |
| loop do |
| case @token_type |
| when :open |
| subtree = parse_tree |
| @adaptor.add_child( root, subtree ) |
| when :identifier, :percent, :dot |
| child = parse_node or return nil |
| @adaptor.add_child( root, child ) |
| else break |
| end |
| end |
| @token_type == :close or return nil |
| @token_type = @tokenizer.next_token |
| return root |
| end |
| |
| def parse_node |
| label = nil |
| if @token_type == :percent |
| ( @token_type = @tokenizer.next_token ) == :identifier or return nil |
| label = @tokenizer.text |
| ( @token_type = @tokenizer.next_token ) == :colon or return nil |
| @token_type = @tokenizer.next_token |
| end |
| |
| if @token_type == :dot |
| @token_type = @tokenizer.next_token |
| wildcard_payload = CommonToken.create( :type => 0, :text => '.' ) |
| node = WildcardPattern.new( wildcard_payload ) |
| label and node.label = label |
| return node |
| end |
| |
| @token_type == :identifier or return nil |
| token_name = @tokenizer.text |
| @token_type = @tokenizer.next_token |
| token_name == 'nil' and return @adaptor.create_flat_list |
| |
| text = token_name |
| arg = nil |
| if @token_type == :argument |
| arg = @tokenizer.text |
| text = arg |
| @token_type = @tokenizer.next_token |
| end |
| |
| node_type = @token_scheme[ token_name ] || INVALID_TOKEN_TYPE |
| node = @adaptor.create_from_type( node_type, text ) |
| |
| if Pattern === node |
| node.label, node.has_text_arg = label, arg |
| end |
| return node |
| end |
| end |
| |
| |
| =begin rdoc ANTLR3::AST::Wizard::Pattern |
| |
| A simple tree class that represents the skeletal structure of tree. It is used |
| to validate tree structures as well as to extract nodes that match the pattern. |
| |
| =end |
| |
| class Pattern < CommonTree |
| def self.parse( pattern_str, scheme ) |
| PatternParser.parse( |
| pattern_str, scheme, PatternAdaptor.new( scheme.token_class ) |
| ) |
| end |
| |
| attr_accessor :label, :has_text_arg |
| alias :has_text_arg? :has_text_arg |
| |
| def initialize( payload ) |
| super( payload ) |
| @label = nil |
| @has_text_arg = nil |
| end |
| |
| def to_s |
| prefix = @label ? '%' << @label << ':' : '' |
| return( prefix << super ) |
| end |
| end |
| |
| =begin rdoc ANTLR3::AST::Wizard::WildcardPattern |
| |
| A simple tree node used to represent the operation "match any tree node type" in |
| a tree pattern. They are represented by '.' in tree pattern specifications. |
| |
| =end |
| |
| class WildcardPattern < Pattern; end |
| |
| |
| =begin rdoc ANTLR3::AST::Wizard::PatternAdaptor |
| |
| A customized TreeAdaptor used by AST::Wizards to build tree patterns. |
| |
| =end |
| |
| class PatternAdaptor < CommonTreeAdaptor |
| def create_with_payload( payload ) |
| return Pattern.new( payload ) |
| end |
| end |
| |
| attr_accessor :token_scheme, :adaptor |
| |
| def initialize( options = {} ) |
| @token_scheme = options.fetch( :token_scheme ) do |
| TokenScheme.build( options[ :token_class ], options[ :tokens ] ) |
| end |
| @adaptor = options.fetch( :adaptor ) do |
| CommonTreeAdaptor.new( @token_scheme.token_class ) |
| end |
| end |
| |
| def create( pattern ) |
| PatternParser.parse( pattern, @token_scheme, @adaptor ) |
| end |
| |
| def index( tree, map = {} ) |
| tree or return( map ) |
| type = @adaptor.type_of( tree ) |
| elements = map[ type ] ||= [] |
| elements << tree |
| @adaptor.each_child( tree ) { | child | index( child, map ) } |
| return( map ) |
| end |
| |
| def find( tree, what ) |
| case what |
| when Integer then find_token_type( tree, what ) |
| when String then find_pattern( tree, what ) |
| when Symbol then find_token_type( tree, @token_scheme[ what ] ) |
| else raise ArgumentError, "search subject must be a token type (integer) or a string" |
| end |
| end |
| |
| def find_token_type( tree, type ) |
| nodes = [] |
| visit( tree, type ) { | t, | nodes << t } |
| return nodes |
| end |
| |
| def find_pattern( tree, pattern ) |
| subtrees = [] |
| visit_pattern( tree, pattern ) { | t, | subtrees << t } |
| return( subtrees ) |
| end |
| |
| def visit( tree, what = nil, &block ) |
| block_given? or return enum_for( :visit, tree, what ) |
| Symbol === what and what = @token_scheme[ what ] |
| case what |
| when nil then visit_all( tree, &block ) |
| when Integer then visit_type( tree, nil, what, &block ) |
| when String then visit_pattern( tree, what, &block ) |
| else raise( ArgumentError, tidy( <<-'END', true ) ) |
| | The 'what' filter argument must be a tree |
| | pattern (String) or a token type (Integer) |
| | -- got #{ what.inspect } |
| END |
| end |
| end |
| |
| def visit_all( tree, parent = nil, &block ) |
| index = @adaptor.child_index( tree ) |
| yield( tree, parent, index, nil ) |
| @adaptor.each_child( tree ) do | child | |
| visit_all( child, tree, &block ) |
| end |
| end |
| |
| def visit_type( tree, parent, type, &block ) |
| tree.nil? and return( nil ) |
| index = @adaptor.child_index( tree ) |
| @adaptor.type_of( tree ) == type and yield( tree, parent, index, nil ) |
| @adaptor.each_child( tree ) do | child | |
| visit_type( child, tree, type, &block ) |
| end |
| end |
| |
| def visit_pattern( tree, pattern, &block ) |
| pattern = Pattern.parse( pattern, @token_scheme ) |
| |
| if pattern.nil? or pattern.flat_list? or pattern.is_a?( WildcardPattern ) |
| return( nil ) |
| end |
| |
| visit( tree, pattern.type ) do | tree, parent, child_index, labels | |
| labels = match!( tree, pattern ) and |
| yield( tree, parent, child_index, labels ) |
| end |
| end |
| |
| def match( tree, pattern ) |
| pattern = Pattern.parse( pattern, @token_scheme ) |
| |
| return( match!( tree, pattern ) ) |
| end |
| |
| def match!( tree, pattern, labels = {} ) |
| tree.nil? || pattern.nil? and return false |
| unless pattern.is_a? WildcardPattern |
| @adaptor.type_of( tree ) == pattern.type or return false |
| pattern.has_text_arg && ( @adaptor.text_of( tree ) != pattern.text ) and |
| return false |
| end |
| labels[ pattern.label ] = tree if labels && pattern.label |
| |
| number_of_children = @adaptor.child_count( tree ) |
| return false unless number_of_children == pattern.child_count |
| |
| number_of_children.times do |index| |
| actual_child = @adaptor.child_of( tree, index ) |
| pattern_child = pattern.child( index ) |
| |
| return( false ) unless match!( actual_child, pattern_child, labels ) |
| end |
| |
| return labels |
| end |
| |
| def equals( tree_a, tree_b, adaptor = @adaptor ) |
| tree_a && tree_b or return( false ) |
| |
| adaptor.type_of( tree_a ) == adaptor.type_of( tree_b ) or return false |
| adaptor.text_of( tree_a ) == adaptor.text_of( tree_b ) or return false |
| |
| child_count_a = adaptor.child_count( tree_a ) |
| child_count_b = adaptor.child_count( tree_b ) |
| child_count_a == child_count_b or return false |
| |
| child_count_a.times do | i | |
| child_a = adaptor.child_of( tree_a, i ) |
| child_b = adaptor.child_of( tree_b, i ) |
| equals( child_a, child_b, adaptor ) or return false |
| end |
| return true |
| end |
| |
| |
| DOT_DOT_PATTERN = /.*[^\.]\\.{2}[^\.].*/ |
| DOUBLE_ETC_PATTERN = /.*\.{3}\s+\.{3}.*/ |
| |
| def in_context?( tree, context ) |
| case context |
| when DOT_DOT_PATTERN then raise ArgumentError, "invalid syntax: .." |
| when DOUBLE_ETC_PATTERN then raise ArgumentError, "invalid syntax: ... ..." |
| end |
| |
| context = context.gsub( /([^\.\s])\.{3}([^\.])/, '\1 ... \2' ) |
| context.strip! |
| nodes = context.split( /\s+/ ) |
| |
| while tree = @adaptor.parent( tree ) and node = nodes.pop |
| if node == '...' |
| node = nodes.pop or return( true ) |
| tree = @adaptor.each_ancestor( tree ).find do | t | |
| @adaptor.type_name( t ) == node |
| end or return( false ) |
| end |
| @adaptor.type_name( tree ) == node or return( false ) |
| end |
| |
| return( false ) if tree.nil? and not nodes.empty? |
| return true |
| end |
| |
| private :match! |
| end |
| end |
| end |