blob: e313e963afb10b39e804e826b22a9b1afda8a802 [file] [log] [blame]
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utility function for stripping comments out of JavaScript source code."""
import re
def _TokenizeJS(text):
"""Splits source code text into segments in preparation for comment stripping.
Note that this doesn't tokenize for parsing. There is no notion of statements,
variables, etc. The only tokens of interest are comment-related tokens.
Args:
text: The contents of a JavaScript file.
Yields:
A succession of strings in the file, including all comment-related symbols.
"""
rest = text
tokens = ["//", "/*", "*/", "\n"]
next_tok = re.compile('|'.join([re.escape(x) for x in tokens]))
while len(rest):
m = next_tok.search(rest)
if not m:
# end of string
yield rest
return
min_index = m.start()
end_index = m.end()
if min_index > 0:
yield rest[:min_index]
yield rest[min_index:end_index]
rest = rest[end_index:]
def StripJSComments(text):
"""Strips comments out of JavaScript source code.
Args:
text: JavaScript source text.
Returns:
JavaScript source text with comments stripped out.
"""
result_tokens = []
token_stream = _TokenizeJS(text).__iter__()
while True:
try:
t = token_stream.next()
except StopIteration:
break
if t == "//":
while True:
try:
t2 = token_stream.next()
if t2 == "\n":
break
except StopIteration:
break
elif t == '/*':
nesting = 1
while True:
try:
t2 = token_stream.next()
if t2 == "/*":
nesting += 1
elif t2 == "*/":
nesting -= 1
if nesting == 0:
break
except StopIteration:
break
else:
result_tokens.append(t)
return "".join(result_tokens)