Version 3.30.33.12 (cherry-pick)

Merged d287f225a3630e71a6d14009e7baf71f465be861

Limit code size generated for very large regexps

R=machenbach@chromium.org
BUG=chromium:440913
LOG=N

Review URL: https://codereview.chromium.org/825673002

Cr-Commit-Position: refs/branch-heads/3.30@{#25266}
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index d078d07..8111a12 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -1027,6 +1027,8 @@
 
   inline bool ignore_case() { return ignore_case_; }
   inline bool one_byte() { return one_byte_; }
+  inline bool optimize() { return optimize_; }
+  inline void set_optimize(bool value) { optimize_ = value; }
   FrequencyCollator* frequency_collator() { return &frequency_collator_; }
 
   int current_expansion_factor() { return current_expansion_factor_; }
@@ -1047,6 +1049,7 @@
   bool ignore_case_;
   bool one_byte_;
   bool reg_exp_too_big_;
+  bool optimize_;
   int current_expansion_factor_;
   FrequencyCollator frequency_collator_;
   Zone* zone_;
@@ -1079,6 +1082,7 @@
       ignore_case_(ignore_case),
       one_byte_(one_byte),
       reg_exp_too_big_(false),
+      optimize_(FLAG_regexp_optimization),
       current_expansion_factor_(1),
       frequency_collator_(),
       zone_(zone) {
@@ -1094,16 +1098,6 @@
     Handle<String> pattern) {
   Heap* heap = pattern->GetHeap();
 
-  bool use_slow_safe_regexp_compiler = false;
-  if (heap->total_regexp_code_generated() >
-          RegExpImpl::kRegWxpCompiledLimit &&
-      heap->isolate()->memory_allocator()->SizeExecutable() >
-          RegExpImpl::kRegExpExecutableMemoryLimit) {
-    use_slow_safe_regexp_compiler = true;
-  }
-
-  macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
-
 #ifdef DEBUG
   if (FLAG_trace_regexp_assembler)
     macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
@@ -2257,8 +2251,7 @@
   // We are being asked to make a non-generic version.  Keep track of how many
   // non-generic versions we generate so as not to overdo it.
   trace_count_++;
-  if (FLAG_regexp_optimization &&
-      trace_count_ < kMaxCopiesCodeGenerated &&
+  if (compiler->optimize() && trace_count_ < kMaxCopiesCodeGenerated &&
       compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
     return CONTINUE;
   }
@@ -4137,15 +4130,12 @@
     }
     alt_gen->expects_preload = preload->preload_is_current_;
     bool generate_full_check_inline = false;
-    if (FLAG_regexp_optimization &&
+    if (compiler->optimize() &&
         try_to_emit_quick_check_for_alternative(i == 0) &&
-        alternative.node()->EmitQuickCheck(compiler,
-                                           trace,
-                                           &new_trace,
-                                           preload->preload_has_checked_bounds_,
-                                           &alt_gen->possible_success,
-                                           &alt_gen->quick_check_details,
-                                           fall_through_on_failure)) {
+        alternative.node()->EmitQuickCheck(
+            compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
+            &alt_gen->possible_success, &alt_gen->quick_check_details,
+            fall_through_on_failure)) {
       // Quick check was generated for this choice.
       preload->preload_is_current_ = true;
       preload->preload_has_checked_bounds_ = true;
@@ -4943,7 +4933,7 @@
 
   if (body_can_be_empty) {
     body_start_reg = compiler->AllocateRegister();
-  } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
+  } else if (compiler->optimize() && !needs_capture_clearing) {
     // Only unroll if there are no captures and the body can't be
     // empty.
     {
@@ -6041,6 +6031,8 @@
   }
   RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
 
+  compiler.set_optimize(!TooMuchRegExpCode(pattern));
+
   // Sample some characters from the middle of the string.
   static const int kSampleSize = 128;
 
@@ -6143,6 +6135,8 @@
   RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);
 #endif  // V8_INTERPRETED_REGEXP
 
+  macro_assembler.set_slow_safe(TooMuchRegExpCode(pattern));
+
   // Inserted here, instead of in Assembler, because it depends on information
   // in the AST that isn't replicated in the Node structure.
   static const int kMaxBacksearchLimit = 1024;
@@ -6166,4 +6160,14 @@
 }
 
 
+bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
+  Heap* heap = pattern->GetHeap();
+  bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
+  if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit &&
+      heap->isolate()->memory_allocator()->SizeExecutable() >
+          RegExpImpl::kRegExpExecutableMemoryLimit) {
+    too_much = true;
+  }
+  return too_much;
+}
 }}  // namespace v8::internal
diff --git a/src/jsregexp.h b/src/jsregexp.h
index c65adea..c81ccb2 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -213,7 +213,8 @@
   // total regexp code compiled including code that has subsequently been freed
   // and the total executable memory at any point.
   static const int kRegExpExecutableMemoryLimit = 16 * MB;
-  static const int kRegWxpCompiledLimit = 1 * MB;
+  static const int kRegExpCompiledLimit = 1 * MB;
+  static const int kRegExpTooLargeToOptimize = 10 * KB;
 
  private:
   static bool CompileIrregexp(Handle<JSRegExp> re,
@@ -1666,6 +1667,8 @@
                                    Handle<String> sample_subject,
                                    bool is_one_byte, Zone* zone);
 
+  static bool TooMuchRegExpCode(Handle<String> pattern);
+
   static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
 };
 
diff --git a/src/version.cc b/src/version.cc
index 057c7e4..63f7b62 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -35,7 +35,7 @@
 #define MAJOR_VERSION     3
 #define MINOR_VERSION     30
 #define BUILD_NUMBER      33
-#define PATCH_LEVEL       11
+#define PATCH_LEVEL       12
 // Use 1 for candidates and 0 otherwise.
 // (Boolean macro values are not supported by all preprocessors.)
 #define IS_CANDIDATE_VERSION 0
diff --git a/test/cctest/test-heap.cc b/test/cctest/test-heap.cc
index 0d43c06..cc44c39 100644
--- a/test/cctest/test-heap.cc
+++ b/test/cctest/test-heap.cc
@@ -1694,6 +1694,64 @@
 }
 
 
+TEST(TestSizeOfRegExpCode) {
+  if (!FLAG_regexp_optimization) return;
+
+  v8::V8::Initialize();
+
+  Isolate* isolate = CcTest::i_isolate();
+  HandleScope scope(isolate);
+
+  LocalContext context;
+
+  // Adjust source below and this check to match
+  // RegExpImple::kRegExpTooLargeToOptimize.
+  DCHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 10 * KB);
+
+  // Compile a regexp that is much larger if we are using regexp optimizations.
+  CompileRun(
+      "var reg_exp_source = '(?:a|bc|def|ghij|klmno|pqrstu)';"
+      "var half_size_reg_exp;"
+      "while (reg_exp_source.length < 10 * 1024) {"
+      "  half_size_reg_exp = reg_exp_source;"
+      "  reg_exp_source = reg_exp_source + reg_exp_source;"
+      "}"
+      // Flatten string.
+      "reg_exp_source.match(/f/);");
+
+  // Get initial heap size after several full GCs, which will stabilize
+  // the heap size and return with sweeping finished completely.
+  CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+  CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+  CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+  CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+  CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+  MarkCompactCollector* collector = CcTest::heap()->mark_compact_collector();
+  if (collector->sweeping_in_progress()) {
+    collector->EnsureSweepingCompleted();
+  }
+  int initial_size = static_cast<int>(CcTest::heap()->SizeOfObjects());
+
+  CompileRun("'foo'.match(reg_exp_source);");
+  CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+  int size_with_regexp = static_cast<int>(CcTest::heap()->SizeOfObjects());
+
+  CompileRun("'foo'.match(half_size_reg_exp);");
+  CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
+  int size_with_optimized_regexp =
+      static_cast<int>(CcTest::heap()->SizeOfObjects());
+
+  int size_of_regexp_code = size_with_regexp - initial_size;
+
+  CHECK_LE(size_of_regexp_code, 500 * KB);
+
+  // Small regexp is half the size, but compiles to more than twice the code
+  // due to the optimization steps.
+  CHECK_GE(size_with_optimized_regexp,
+           size_with_regexp + size_of_regexp_code * 2);
+}
+
+
 TEST(TestSizeOfObjects) {
   v8::V8::Initialize();