| # Sources (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009 |
| # David Turner <david@freetype.org> |
| # |
| # |
| # this file contains definitions of classes needed to decompose |
| # C sources files into a series of multi-line "blocks". There are |
| # two kinds of blocks: |
| # |
| # - normal blocks, which contain source code or ordinary comments |
| # |
| # - documentation blocks, which have restricted formatting, and |
| # whose text always start with a documentation markup tag like |
| # "<Function>", "<Type>", etc.. |
| # |
| # the routines used to process the content of documentation blocks |
| # are not contained here, but in "content.py" |
| # |
| # the classes and methods found here only deal with text parsing |
| # and basic documentation block extraction |
| # |
| |
| import fileinput, re, sys, os, string |
| |
| |
| |
| ################################################################ |
| ## |
| ## BLOCK FORMAT PATTERN |
| ## |
| ## A simple class containing compiled regular expressions used |
| ## to detect potential documentation format block comments within |
| ## C source code |
| ## |
| ## note that the 'column' pattern must contain a group that will |
| ## be used to "unbox" the content of documentation comment blocks |
| ## |
| class SourceBlockFormat: |
| |
| def __init__( self, id, start, column, end ): |
| """create a block pattern, used to recognize special documentation blocks""" |
| self.id = id |
| self.start = re.compile( start, re.VERBOSE ) |
| self.column = re.compile( column, re.VERBOSE ) |
| self.end = re.compile( end, re.VERBOSE ) |
| |
| |
| |
| # |
| # format 1 documentation comment blocks look like the following: |
| # |
| # /************************************/ |
| # /* */ |
| # /* */ |
| # /* */ |
| # /************************************/ |
| # |
| # we define a few regular expressions here to detect them |
| # |
| |
| start = r''' |
| \s* # any number of whitespace |
| /\*{2,}/ # followed by '/' and at least two asterisks then '/' |
| \s*$ # probably followed by whitespace |
| ''' |
| |
| column = r''' |
| \s* # any number of whitespace |
| /\*{1} # followed by '/' and precisely one asterisk |
| ([^*].*) # followed by anything (group 1) |
| \*{1}/ # followed by one asterisk and a '/' |
| \s*$ # probably followed by whitespace |
| ''' |
| |
| re_source_block_format1 = SourceBlockFormat( 1, start, column, start ) |
| |
| |
| # |
| # format 2 documentation comment blocks look like the following: |
| # |
| # /************************************ (at least 2 asterisks) |
| # * |
| # * |
| # * |
| # * |
| # **/ (1 or more asterisks at the end) |
| # |
| # we define a few regular expressions here to detect them |
| # |
| start = r''' |
| \s* # any number of whitespace |
| /\*{2,} # followed by '/' and at least two asterisks |
| \s*$ # probably followed by whitespace |
| ''' |
| |
| column = r''' |
| \s* # any number of whitespace |
| \*{1}(?!/) # followed by precisely one asterisk not followed by `/' |
| (.*) # then anything (group1) |
| ''' |
| |
| end = r''' |
| \s* # any number of whitespace |
| \*+/ # followed by at least one asterisk, then '/' |
| ''' |
| |
| re_source_block_format2 = SourceBlockFormat( 2, start, column, end ) |
| |
| |
| # |
| # the list of supported documentation block formats, we could add new ones |
| # relatively easily |
| # |
| re_source_block_formats = [re_source_block_format1, re_source_block_format2] |
| |
| |
| # |
| # the following regular expressions corresponds to markup tags |
| # within the documentation comment blocks. they're equivalent |
| # despite their different syntax |
| # |
| # notice how each markup tag _must_ begin a new line |
| # |
| re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' ) # <xxxx> format |
| re_markup_tag2 = re.compile( r'''\s*@(\w*):''' ) # @xxxx: format |
| |
| # |
| # the list of supported markup tags, we could add new ones relatively |
| # easily |
| # |
| re_markup_tags = [re_markup_tag1, re_markup_tag2] |
| |
| # |
| # used to detect a cross-reference, after markup tags have been stripped |
| # |
| re_crossref = re.compile( r'@(\w*)(.*)' ) |
| |
| # |
| # used to detect italic and bold styles in paragraph text |
| # |
| re_italic = re.compile( r"_(\w(\w|')*)_(.*)" ) # _italic_ |
| re_bold = re.compile( r"\*(\w(\w|')*)\*(.*)" ) # *bold* |
| |
| # |
| # used to detect the end of commented source lines |
| # |
| re_source_sep = re.compile( r'\s*/\*\s*\*/' ) |
| |
| # |
| # used to perform cross-reference within source output |
| # |
| re_source_crossref = re.compile( r'(\W*)(\w*)' ) |
| |
| # |
| # a list of reserved source keywords |
| # |
| re_source_keywords = re.compile( '''\\b ( typedef | |
| struct | |
| enum | |
| union | |
| const | |
| char | |
| int | |
| short | |
| long | |
| void | |
| signed | |
| unsigned | |
| \#include | |
| \#define | |
| \#undef | |
| \#if | |
| \#ifdef | |
| \#ifndef | |
| \#else | |
| \#endif ) \\b''', re.VERBOSE ) |
| |
| |
| ################################################################ |
| ## |
| ## SOURCE BLOCK CLASS |
| ## |
| ## A SourceProcessor is in charge of reading a C source file |
| ## and decomposing it into a series of different "SourceBlocks". |
| ## each one of these blocks can be made of the following data: |
| ## |
| ## - A documentation comment block that starts with "/**" and |
| ## whose exact format will be discussed later |
| ## |
| ## - normal sources lines, including comments |
| ## |
| ## the important fields in a text block are the following ones: |
| ## |
| ## self.lines : a list of text lines for the corresponding block |
| ## |
| ## self.content : for documentation comment blocks only, this is the |
| ## block content that has been "unboxed" from its |
| ## decoration. This is None for all other blocks |
| ## (i.e. sources or ordinary comments with no starting |
| ## markup tag) |
| ## |
| class SourceBlock: |
| |
| def __init__( self, processor, filename, lineno, lines ): |
| self.processor = processor |
| self.filename = filename |
| self.lineno = lineno |
| self.lines = lines[:] |
| self.format = processor.format |
| self.content = [] |
| |
| if self.format == None: |
| return |
| |
| words = [] |
| |
| # extract comment lines |
| lines = [] |
| |
| for line0 in self.lines: |
| m = self.format.column.match( line0 ) |
| if m: |
| lines.append( m.group( 1 ) ) |
| |
| # now, look for a markup tag |
| for l in lines: |
| l = string.strip( l ) |
| if len( l ) > 0: |
| for tag in re_markup_tags: |
| if tag.match( l ): |
| self.content = lines |
| return |
| |
| def location( self ): |
| return "(" + self.filename + ":" + repr( self.lineno ) + ")" |
| |
| # debugging only - not used in normal operations |
| def dump( self ): |
| if self.content: |
| print "{{{content start---" |
| for l in self.content: |
| print l |
| print "---content end}}}" |
| return |
| |
| fmt = "" |
| if self.format: |
| fmt = repr( self.format.id ) + " " |
| |
| for line in self.lines: |
| print line |
| |
| |
| |
| ################################################################ |
| ## |
| ## SOURCE PROCESSOR CLASS |
| ## |
| ## The SourceProcessor is in charge of reading a C source file |
| ## and decomposing it into a series of different "SourceBlock" |
| ## objects. |
| ## |
| ## each one of these blocks can be made of the following data: |
| ## |
| ## - A documentation comment block that starts with "/**" and |
| ## whose exact format will be discussed later |
| ## |
| ## - normal sources lines, include comments |
| ## |
| ## |
| class SourceProcessor: |
| |
| def __init__( self ): |
| """initialize a source processor""" |
| self.blocks = [] |
| self.filename = None |
| self.format = None |
| self.lines = [] |
| |
| def reset( self ): |
| """reset a block processor, clean all its blocks""" |
| self.blocks = [] |
| self.format = None |
| |
| def parse_file( self, filename ): |
| """parse a C source file, and add its blocks to the processor's list""" |
| self.reset() |
| |
| self.filename = filename |
| |
| fileinput.close() |
| self.format = None |
| self.lineno = 0 |
| self.lines = [] |
| |
| for line in fileinput.input( filename ): |
| # strip trailing newlines, important on Windows machines! |
| if line[-1] == '\012': |
| line = line[0:-1] |
| |
| if self.format == None: |
| self.process_normal_line( line ) |
| else: |
| if self.format.end.match( line ): |
| # that's a normal block end, add it to 'lines' and |
| # create a new block |
| self.lines.append( line ) |
| self.add_block_lines() |
| elif self.format.column.match( line ): |
| # that's a normal column line, add it to 'lines' |
| self.lines.append( line ) |
| else: |
| # humm.. this is an unexpected block end, |
| # create a new block, but don't process the line |
| self.add_block_lines() |
| |
| # we need to process the line again |
| self.process_normal_line( line ) |
| |
| # record the last lines |
| self.add_block_lines() |
| |
| def process_normal_line( self, line ): |
| """process a normal line and check whether it is the start of a new block""" |
| for f in re_source_block_formats: |
| if f.start.match( line ): |
| self.add_block_lines() |
| self.format = f |
| self.lineno = fileinput.filelineno() |
| |
| self.lines.append( line ) |
| |
| def add_block_lines( self ): |
| """add the current accumulated lines and create a new block""" |
| if self.lines != []: |
| block = SourceBlock( self, self.filename, self.lineno, self.lines ) |
| |
| self.blocks.append( block ) |
| self.format = None |
| self.lines = [] |
| |
| # debugging only, not used in normal operations |
| def dump( self ): |
| """print all blocks in a processor""" |
| for b in self.blocks: |
| b.dump() |
| |
| # eof |