Version 3.30.33.12 (cherry-pick)
Merged d287f225a3630e71a6d14009e7baf71f465be861
Limit code size generated for very large regexps
R=machenbach@chromium.org
BUG=chromium:440913
LOG=N
Review URL: https://codereview.chromium.org/825673002
Cr-Commit-Position: refs/branch-heads/3.30@{#25266}
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index d078d07..8111a12 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -1027,6 +1027,8 @@
inline bool ignore_case() { return ignore_case_; }
inline bool one_byte() { return one_byte_; }
+ inline bool optimize() { return optimize_; }
+ inline void set_optimize(bool value) { optimize_ = value; }
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
int current_expansion_factor() { return current_expansion_factor_; }
@@ -1047,6 +1049,7 @@
bool ignore_case_;
bool one_byte_;
bool reg_exp_too_big_;
+ bool optimize_;
int current_expansion_factor_;
FrequencyCollator frequency_collator_;
Zone* zone_;
@@ -1079,6 +1082,7 @@
ignore_case_(ignore_case),
one_byte_(one_byte),
reg_exp_too_big_(false),
+ optimize_(FLAG_regexp_optimization),
current_expansion_factor_(1),
frequency_collator_(),
zone_(zone) {
@@ -1094,16 +1098,6 @@
Handle<String> pattern) {
Heap* heap = pattern->GetHeap();
- bool use_slow_safe_regexp_compiler = false;
- if (heap->total_regexp_code_generated() >
- RegExpImpl::kRegWxpCompiledLimit &&
- heap->isolate()->memory_allocator()->SizeExecutable() >
- RegExpImpl::kRegExpExecutableMemoryLimit) {
- use_slow_safe_regexp_compiler = true;
- }
-
- macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
-
#ifdef DEBUG
if (FLAG_trace_regexp_assembler)
macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
@@ -2257,8 +2251,7 @@
// We are being asked to make a non-generic version. Keep track of how many
// non-generic versions we generate so as not to overdo it.
trace_count_++;
- if (FLAG_regexp_optimization &&
- trace_count_ < kMaxCopiesCodeGenerated &&
+ if (compiler->optimize() && trace_count_ < kMaxCopiesCodeGenerated &&
compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
return CONTINUE;
}
@@ -4137,15 +4130,12 @@
}
alt_gen->expects_preload = preload->preload_is_current_;
bool generate_full_check_inline = false;
- if (FLAG_regexp_optimization &&
+ if (compiler->optimize() &&
try_to_emit_quick_check_for_alternative(i == 0) &&
- alternative.node()->EmitQuickCheck(compiler,
- trace,
- &new_trace,
- preload->preload_has_checked_bounds_,
- &alt_gen->possible_success,
- &alt_gen->quick_check_details,
- fall_through_on_failure)) {
+ alternative.node()->EmitQuickCheck(
+ compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
+ &alt_gen->possible_success, &alt_gen->quick_check_details,
+ fall_through_on_failure)) {
// Quick check was generated for this choice.
preload->preload_is_current_ = true;
preload->preload_has_checked_bounds_ = true;
@@ -4943,7 +4933,7 @@
if (body_can_be_empty) {
body_start_reg = compiler->AllocateRegister();
- } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
+ } else if (compiler->optimize() && !needs_capture_clearing) {
// Only unroll if there are no captures and the body can't be
// empty.
{
@@ -6041,6 +6031,8 @@
}
RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
+ compiler.set_optimize(!TooMuchRegExpCode(pattern));
+
// Sample some characters from the middle of the string.
static const int kSampleSize = 128;
@@ -6143,6 +6135,8 @@
RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);
#endif // V8_INTERPRETED_REGEXP
+ macro_assembler.set_slow_safe(TooMuchRegExpCode(pattern));
+
// Inserted here, instead of in Assembler, because it depends on information
// in the AST that isn't replicated in the Node structure.
static const int kMaxBacksearchLimit = 1024;
@@ -6166,4 +6160,14 @@
}
+bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
+ Heap* heap = pattern->GetHeap();
+ bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
+ if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit &&
+ heap->isolate()->memory_allocator()->SizeExecutable() >
+ RegExpImpl::kRegExpExecutableMemoryLimit) {
+ too_much = true;
+ }
+ return too_much;
+}
}} // namespace v8::internal
diff --git a/src/jsregexp.h b/src/jsregexp.h
index c65adea..c81ccb2 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -213,7 +213,8 @@
// total regexp code compiled including code that has subsequently been freed
// and the total executable memory at any point.
static const int kRegExpExecutableMemoryLimit = 16 * MB;
- static const int kRegWxpCompiledLimit = 1 * MB;
+ static const int kRegExpCompiledLimit = 1 * MB;
+ static const int kRegExpTooLargeToOptimize = 10 * KB;
private:
static bool CompileIrregexp(Handle<JSRegExp> re,
@@ -1666,6 +1667,8 @@
Handle<String> sample_subject,
bool is_one_byte, Zone* zone);
+ static bool TooMuchRegExpCode(Handle<String> pattern);
+
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};
diff --git a/src/version.cc b/src/version.cc
index 057c7e4..63f7b62 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -35,7 +35,7 @@
#define MAJOR_VERSION 3
#define MINOR_VERSION 30
#define BUILD_NUMBER 33
-#define PATCH_LEVEL 11
+#define PATCH_LEVEL 12
// Use 1 for candidates and 0 otherwise.
// (Boolean macro values are not supported by all preprocessors.)
#define IS_CANDIDATE_VERSION 0
diff --git a/test/cctest/test-heap.cc b/test/cctest/test-heap.cc
index 0d43c06..cc44c39 100644
--- a/test/cctest/test-heap.cc
+++ b/test/cctest/test-heap.cc
@@ -1694,6 +1694,64 @@
}
+TEST(TestSizeOfRegExpCode) {
+ if (!FLAG_regexp_optimization) return;
+
+ v8::V8::Initialize();
+
+ Isolate* isolate = CcTest::i_isolate();
+ HandleScope scope(isolate);
+
+ LocalContext context;
+
+ // Adjust source below and this check to match
+ // RegExpImple::kRegExpTooLargeToOptimize.
+ DCHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 10 * KB);
+
+ // Compile a regexp that is much larger if we are using regexp optimizations.
+ CompileRun(
+ "var reg_exp_source = '(?:a|bc|def|ghij|klmno|pqrstu)';"
+ "var half_size_reg_exp;"
+ "while (reg_exp_source.length < 10 * 1024) {"
+ " half_size_reg_exp = reg_exp_source;"
+ " reg_exp_source = reg_exp_source + reg_exp_source;"
+ "}"
+ // Flatten string.
+ "reg_exp_source.match(/f/);");
+
+ // Get initial heap size after several full GCs, which will stabilize
+ // the heap size and return with sweeping finished completely.
+ CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+ CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+ CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+ CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+ CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+ MarkCompactCollector* collector = CcTest::heap()->mark_compact_collector();
+ if (collector->sweeping_in_progress()) {
+ collector->EnsureSweepingCompleted();
+ }
+ int initial_size = static_cast<int>(CcTest::heap()->SizeOfObjects());
+
+ CompileRun("'foo'.match(reg_exp_source);");
+ CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+ int size_with_regexp = static_cast<int>(CcTest::heap()->SizeOfObjects());
+
+ CompileRun("'foo'.match(half_size_reg_exp);");
+ CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+ int size_with_optimized_regexp =
+ static_cast<int>(CcTest::heap()->SizeOfObjects());
+
+ int size_of_regexp_code = size_with_regexp - initial_size;
+
+ CHECK_LE(size_of_regexp_code, 500 * KB);
+
+ // Small regexp is half the size, but compiles to more than twice the code
+ // due to the optimization steps.
+ CHECK_GE(size_with_optimized_regexp,
+ size_with_regexp + size_of_regexp_code * 2);
+}
+
+
TEST(TestSizeOfObjects) {
v8::V8::Initialize();