Rewrite API doc generator tool to handle more cases.

Change-Id: I8875a42ee30d374fde388256f11ff319012e9393
diff --git a/Docs/generate_api_reference_md.pl b/Docs/generate_api_reference_md.pl
deleted file mode 100755
index 546fe28..0000000
--- a/Docs/generate_api_reference_md.pl
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/perl
-#
-#  Copyright (C) 2015 Google, Inc.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at:
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-use strict;
-use warnings;
-use Cwd 'abs_path';
-use JSON;
-use File::Find;
-
-my $sl4a_path = $ARGV[0];
-my $md = "";
-my $md_end = "";
-
-if (not defined $sl4a_path) {
-    $sl4a_path = abs_path($0);
-    $sl4a_path =~ s/\/Docs\/generate_api_reference_md\.pl//g;
-}
-
-sub eachFile {
-    my $filename = $_;
-    my $fullpath = $File::Find::name;
-    if (-e $filename && $filename =~ m/Facade\.java/) {
-        open(FILE, $filename);
-        my @lines = <FILE>;
-        close(FILE);
-
-        my $title = $filename;
-        $title =~ s/\.java//;
-        $title = '**' . $title . '**' . "\n";
-        $md = $md . "\n$title";
-        my $description = "";
-        for (my $i = 0; $i < scalar(@lines); $i++) {
-            my $line = $lines[$i];
-            $line =~ s/\n//;
-            $line =~ s/^\s+|\s+$//g;
-
-            if ($line =~ m /^\@Rpc\(description/) {
-                $description = "";
-                for (my $j = $i; $j < scalar(@lines); $j++) {
-                    my $l = $lines[$j];
-                    $l =~ s/^\s+|\s+$//g;
-                    $description = $description . $l;
-                    if ($l =~ m/\)$/) {
-                        $i = $j;
-                        last;
-                    }
-                }
-                $description = _format_description($description);
-
-            }
-            if ($line =~ m /^public/ && $description ne "") {
-                my @words = split(/\s/, $line);
-                my $func_name = $words[2];
-                my $func_names_and_params = "";
-                if ($func_name =~ /void/) {
-                    $func_name = $words[3];
-                    if ($func_name =~ /void/) {
-                        $description = "";
-                        $func_names_and_params = "";
-                        next;
-                    }
-                }
-                if ($func_name =~ /\(/) {
-                    $func_name =~ s/\(.*//;
-                }
-                $func_name =~ s/\(//g;
-                $func_name =~ s/\)//g;
-                for (my $j = $i; $j < scalar(@lines); $j++) {
-                    $func_names_and_params = $func_names_and_params . $lines[$j];
-                    if ($lines[$j] =~ m/{$/) {
-                        last;
-                    }
-                }
-                $func_names_and_params = _format_func_names_and_params($func_names_and_params);
-                if ($func_names_and_params eq "") {
-                    $func_names_and_params = ")\n";
-                } else {
-                    $func_names_and_params = "\n" . $func_names_and_params;
-                }
-                $md_end = $md_end . "# $func_name\n```\n" .
-                    "$func_name(" . $func_names_and_params . "\n$description\n```\n\n" ;
-                $description = "";
-                $func_names_and_params = "";
-                my $lc_name = lc $func_name;
-                $md = $md . "  * [$func_name](\#$lc_name)\n";
-            }
-        }
-
-    }
-}
-
-sub _format_func_names_and_params {
-    my $fn = shift;
-    $fn =~ s/^\s+|\s+$//g;
-    my @words = split(/\n/,$fn);
-    my $format = "";
-    my $description = "";
-    my $name = "";
-    my $params = "";
-    for my $w (@words) {
-        if ($w =~ /\@RpcParameter\(name = "(.+?)", description = "(.+?)"/) {
-           $name = $1;
-           $description = $2;
-        }
-        elsif ($w =~ /\@RpcParameter\(name = "(.+?)"/) {
-           $name = $1;
-        }
-        if ($w =~ m/,$/) {
-            my @split = split(/\s/, $w);
-            $params = "$split[$#split-1] $split[$#split]";
-            if ($description eq "") {
-                $format = $params;
-            } elsif ($description ne "") {
-                $params =~ s/,//;
-                $format = $format . "  $params: $description,\n"
-            }
-            $description = "";
-            $name = "";
-            $params = "";
-        }
-    }
-    $format =~ s/,$/)/;
-    return $format;
-}
-
-sub _format_description {
-    my $description = shift;
-    $description =~ s/\@Rpc\(//;
-    $description =~ s/^\s+|\s+$//g;
-    $description =~ s/\n//g;
-    $description =~ s/description = \"//g;
-    $description =~ s/\"\)//g;
-    if ($description =~ m/returns(\s*)=/) {
-        $description =~ s/\",//;
-        my @words = split(/returns(\s*)=/, $description);
-        my $des = $words[0];
-        my $ret = $words[1];
-        $ret =~ s/^\s+|\s+$//g;
-        $ret =~ s/^"//;
-        $description = $des . "\n\n" . "Returns:\n" . "  $ret";
-    }
-    return $description;
-}
-
-find (\&eachFile, $sl4a_path);
-open(FILE, ">$sl4a_path/Docs/ApiReference.md");
-print FILE $md . "\n";
-print FILE $md_end . "\n";
-close(FILE);
diff --git a/Docs/generate_api_reference_md.py b/Docs/generate_api_reference_md.py
new file mode 100755
index 0000000..8c77669
--- /dev/null
+++ b/Docs/generate_api_reference_md.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+#
+#  Copyright (C) 2016 Google, Inc.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import collections
+import itertools
+import os
+import re
+import subprocess
+
+# Parsing states:
+# STATE_INITIAL: looking for rpc or function defintion
+# STATE_RPC_DECORATOR: in the middle of a multi-line rpc definition
+# STATE_FUNCTION_DECORATOR: in the middle of a multi-line function definition
+# STATE_COMPLETE: done parsing a function
+STATE_INITIAL = 1
+STATE_RPC_DECORATOR = 2
+STATE_FUNCTION_DEFINITION = 3
+STATE_COMPLETE = 4
+
+# RE to match key=value tuples with matching quoting on value.
+KEY_VAL_RE = re.compile(r'''
+        (?P<key>\w+)\s*=\s* # Key consists of only alphanumerics
+        (?P<quote>["']?)    # Optional quote character.
+        (?P<value>.*?)      # Value is a non greedy match
+        (?P=quote)          # Closing quote equals the first.
+        ($|,)               # Entry ends with comma or end of string
+    ''', re.VERBOSE)
+
+# RE to match a function definition and extract out the function name.
+FUNC_RE = re.compile(r'.+\s+(\w+)\s*\(.*')
+
+
+class Function(object):
+    """Represents a RPC-exported function."""
+
+    def __init__(self, rpc_def, func_def):
+        """Constructs a function object given its RPC and function signature."""
+        self._function = ''
+        self._signature = ''
+        self._description = ''
+        self._returns = ''
+
+        self._ParseRpcDefinition(rpc_def)
+        self._ParseFunctionDefinition(func_def)
+
+    def _ParseRpcDefinition(self, s):
+        """Parse RPC definition."""
+        # collapse string concatenation
+        s = s.replace('" + "', '')
+        s = s.strip('()')
+        for m in KEY_VAL_RE.finditer(s):
+            if m.group('key') == 'description':
+                self._description = m.group('value')
+            if m.group('key') == 'returns':
+                self._returns = m.group('value')
+
+    def _ParseFunctionDefinition(self, s):
+        """Parse function definition."""
+        # Remove some keywords we don't care about.
+        s = s.replace('public ', '')
+        s = s.replace('synchronized ', '')
+        # Remove any throw specifications.
+        s = re.sub('\s+throws.*', '', s)
+        s = s.strip('{')
+        # Remove all the RPC parameter annotations.
+        s = s.replace('@RpcOptional ', '')
+        s = s.replace('@RpcOptional() ', '')
+        s = re.sub('@RpcParameter\s*\(.+?\)\s+', '', s)
+        s = re.sub('@RpcDefault\s*\(.+?\)\s+', '', s)
+        m = FUNC_RE.match(s)
+        if m:
+            self._function = m.group(1)
+        self._signature = s.strip()
+
+    @property
+    def function(self):
+        return self._function
+
+    @property
+    def signature(self):
+        return self._signature
+
+    @property
+    def description(self):
+        return self._description
+
+    @property
+    def returns(self):
+        return self._returns
+
+
+class DocGenerator(object):
+    """Documentation genereator."""
+
+    def __init__(self, basepath):
+        """Construct based on all the *Facade.java files in the given basepath."""
+        self._functions = collections.defaultdict(list)
+
+        for path, dirs, files in os.walk(basepath):
+            for f in files:
+                if f.endswith('Facade.java'):
+                    self._Parse(os.path.join(path, f))
+
+    def _Parse(self, filename):
+        """Parser state machine for a single file."""
+        state = STATE_INITIAL
+        self._current_rpc = ''
+        self._current_function = ''
+
+        with open(filename, 'r') as f:
+            for line in f.readlines():
+                line = line.strip()
+                if state == STATE_INITIAL:
+                    state = self._ParseLineInitial(line)
+                elif state == STATE_RPC_DECORATOR:
+                    state = self._ParseLineRpcDecorator(line)
+                elif state == STATE_FUNCTION_DEFINITION:
+                    state = self._ParseLineFunctionDefinition(line)
+
+                if state == STATE_COMPLETE:
+                    self._EmitFunction(filename)
+                    state = STATE_INITIAL
+
+    def _ParseLineInitial(self, line):
+        """Parse a line while in STATE_INITIAL."""
+        if line.startswith('@Rpc('):
+            self._current_rpc = line[4:]
+            if not line.endswith(')'):
+                # Multi-line RPC definition
+                return STATE_RPC_DECORATOR
+        elif line.startswith('public'):
+            self._current_function = line
+            if not line.endswith('{'):
+                # Multi-line function definition
+                return STATE_FUNCTION_DEFINITION
+            else:
+                return STATE_COMPLETE
+        return STATE_INITIAL
+
+    def _ParseLineRpcDecorator(self, line):
+        """Parse a line while in STATE_RPC_DECORATOR."""
+        self._current_rpc += ' ' + line
+        if line.endswith(')'):
+            # Done with RPC definition
+            return STATE_INITIAL
+        else:
+            # Multi-line RPC definition
+            return STATE_RPC_DECORATOR
+
+    def _ParseLineFunctionDefinition(self, line):
+        """Parse a line while in STATE_FUNCTION_DEFINITION."""
+        self._current_function += ' ' + line
+        if line.endswith('{'):
+            # Done with function definition
+            return STATE_COMPLETE
+        else:
+            # Multi-line function definition
+            return STATE_FUNCTION_DEFINITION
+
+    def _EmitFunction(self, filename):
+        """Store a function definition from the current parse state."""
+        if self._current_rpc and self._current_function:
+            module = os.path.basename(filename)[0:-5]
+            f = Function(self._current_rpc, self._current_function)
+            if f.function:
+                self._functions[module].append(f)
+
+        self._current_rpc = None
+        self._current_function = None
+
+    def WriteOutput(self, filename):
+        git_rev = None
+        try:
+            git_rev = subprocess.check_output('git rev-parse HEAD',
+                                              shell=True).strip()
+        except subprocess.CalledProcessError as e:
+            # Getting the commit ID is optional; we continue if we cannot get it
+            pass
+
+        with open(filename, 'w') as f:
+            if git_rev:
+                f.write('Generated at commit `%s`\n\n' % git_rev)
+            # Write table of contents
+            for module in sorted(self._functions.keys()):
+                f.write('**%s**\n\n' % module)
+                for func in self._functions[module]:
+                    f.write('  * [%s](#%s)\n' %
+                            (func.function, func.function.lower()))
+                f.write('\n')
+
+            f.write('# Method descriptions\n\n')
+            for func in itertools.chain.from_iterable(
+                    self._functions.itervalues()):
+                f.write('## %s\n\n' % func.function)
+                f.write('```\n')
+                f.write('%s\n\n' % func.signature)
+                f.write('%s\n' % func.description)
+                if func.returns:
+                    if func.returns.lower().startswith('return'):
+                        f.write('\n%s\n' % func.returns)
+                    else:
+                        f.write('\nReturns %s\n' % func.returns)
+                f.write('```\n\n')
+
+# Main
+basepath = os.path.abspath(os.path.join(os.path.dirname(
+    os.path.realpath(__file__)), '..'))
+g = DocGenerator(basepath)
+g.WriteOutput(os.path.join(basepath, 'Docs/ApiReference.md'))
diff --git a/README.md b/README.md
index c5a9ef1..425f623 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@
 ### Generate the API Documentation
 From SL4A source directory run this command:
 
-        perl Docs/generate_api_reference_md.pl
+        python Docs/generate_api_reference_md.py
 
 In the Docs directory there should now be an ApiReference.md file that
 contains which RPC functions are available in SL4A as well as documentation