| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Utility function for stripping comments out of JavaScript source code.""" |
| |
| import re |
| |
| |
| def _TokenizeJS(text): |
| """Splits source code text into segments in preparation for comment stripping. |
| |
| Note that this doesn't tokenize for parsing. There is no notion of statements, |
| variables, etc. The only tokens of interest are comment-related tokens. |
| |
| Args: |
| text: The contents of a JavaScript file. |
| |
| Yields: |
| A succession of strings in the file, including all comment-related symbols. |
| """ |
| rest = text |
| tokens = ["//", "/*", "*/", "\n"] |
| next_tok = re.compile('|'.join([re.escape(x) for x in tokens])) |
| while len(rest): |
| m = next_tok.search(rest) |
| if not m: |
| # end of string |
| yield rest |
| return |
| min_index = m.start() |
| end_index = m.end() |
| |
| if min_index > 0: |
| yield rest[:min_index] |
| |
| yield rest[min_index:end_index] |
| rest = rest[end_index:] |
| |
| |
| def StripJSComments(text): |
| """Strips comments out of JavaScript source code. |
| |
| Args: |
| text: JavaScript source text. |
| |
| Returns: |
| JavaScript source text with comments stripped out. |
| """ |
| result_tokens = [] |
| token_stream = _TokenizeJS(text).__iter__() |
| while True: |
| try: |
| t = token_stream.next() |
| except StopIteration: |
| break |
| |
| if t == "//": |
| while True: |
| try: |
| t2 = token_stream.next() |
| if t2 == "\n": |
| break |
| except StopIteration: |
| break |
| elif t == '/*': |
| nesting = 1 |
| while True: |
| try: |
| t2 = token_stream.next() |
| if t2 == "/*": |
| nesting += 1 |
| elif t2 == "*/": |
| nesting -= 1 |
| if nesting == 0: |
| break |
| except StopIteration: |
| break |
| else: |
| result_tokens.append(t) |
| return "".join(result_tokens) |