Pool allocate ImplicitConversionSequences. To avoid malloc thrashing give OverloadCandidateSet an inline capacity for conversion sequences. We use the fact that OverloadCandidates never outlive the OverloadCandidateSet and have a fixed amount of conversion sequences. This eliminates the oversized SmallVector from OverloadCandidate shrinking it from 752 to 208 bytes. On the test case from the "Why is CLANG++ so freaking slow" thread on llvmdev this avoids one gig of vector reallocation (including memcpy) which translates into 5-10% speedup on Lion/x86_64. Overload candidate computation is still the biggest malloc contributor when compiling templated c++ code. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@148186 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 09dd3798e100ace40defdc5541277502346213f2 [log] [tgz]
author: Benjamin Kramer <benny.kra@googlemail.com> Sat Jan 14 16:32:05 2012 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> Sat Jan 14 16:32:05 2012 +0000
tree: 69decdebd982bb39d1f87d022ca45ddf23e9c704
parent: 0e6a16f3e8c8d80a61e89848d216972aa8daea27 [diff]
diff --git a/include/clang/Sema/Overload.h b/include/clang/Sema/Overload.h
index e2d8fdc..6865b85 100644
--- a/include/clang/Sema/Overload.h
+++ b/include/clang/Sema/Overload.h

@@ -24,6 +24,7 @@
 #include "clang/Sema/SemaFixItUtils.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
 
 namespace clang {
   class ASTContext;
@@ -582,12 +583,17 @@
     CXXConversionDecl *Surrogate;
 
     /// Conversions - The conversion sequences used to convert the
-    /// function arguments to the function parameters.
-    SmallVector<ImplicitConversionSequence, 4> Conversions;
+    /// function arguments to the function parameters, the pointer points to a
+    /// fixed size array with NumConversions elements. The memory is owned by
+    /// the OverloadCandidateSet.
+    ImplicitConversionSequence *Conversions;
 
     /// The FixIt hints which can be used to fix the Bad candidate.
     ConversionFixItGenerator Fix;
 
+    /// NumConversions - The number of elements in the Conversions array.
+    unsigned NumConversions;
+
     /// Viable - True to indicate that this overload candidate is viable.
     bool Viable;
 
@@ -653,13 +659,17 @@
       StandardConversionSequence FinalConversion;
     };
 
+    ~OverloadCandidate() {
+      for (unsigned i = 0, e = NumConversions; i != e; ++i)
+        Conversions[i].~ImplicitConversionSequence();
+    }
+
     /// hasAmbiguousConversion - Returns whether this overload
     /// candidate requires an ambiguous conversion or not.
     bool hasAmbiguousConversion() const {
-      for (SmallVectorImpl<ImplicitConversionSequence>::const_iterator
-             I = Conversions.begin(), E = Conversions.end(); I != E; ++I) {
-        if (!I->isInitialized()) return false;
-        if (I->isAmbiguous()) return true;
+      for (unsigned i = 0, e = NumConversions; i != e; ++i) {
+        if (!Conversions[i].isInitialized()) return false;
+        if (Conversions[i].isAmbiguous()) return true;
       }
       return false;
     }
@@ -684,13 +694,19 @@
     SmallVector<OverloadCandidate, 16> Candidates;
     llvm::SmallPtrSet<Decl *, 16> Functions;
 
+    // Allocator for OverloadCandidate::Conversions. We store the first few
+    // elements inline to avoid allocation for small sets.
+    llvm::BumpPtrAllocator ConversionSequenceAllocator;
+    size_t InlineSpace[16*sizeof(ImplicitConversionSequence) / sizeof(size_t)];
+    unsigned NumInlineSequences;
+
     SourceLocation Loc;    
     
     OverloadCandidateSet(const OverloadCandidateSet &);
     OverloadCandidateSet &operator=(const OverloadCandidateSet &);
     
   public:
-    OverloadCandidateSet(SourceLocation Loc) : Loc(Loc) {}
+    OverloadCandidateSet(SourceLocation Loc) : NumInlineSequences(0), Loc(Loc){}
 
     SourceLocation getLocation() const { return Loc; }
 
@@ -714,8 +730,27 @@
     /// to the overload set.
     OverloadCandidate &addCandidate(unsigned NumConversions = 0) {
       Candidates.push_back(OverloadCandidate());
-      Candidates.back().Conversions.resize(NumConversions);
-      return Candidates.back();
+      OverloadCandidate &C = Candidates.back();
+
+      // Assign space from the inline array if there are enough free slots
+      // available.
+      if (NumConversions + NumInlineSequences < 16) {
+        ImplicitConversionSequence *I =
+          (ImplicitConversionSequence*)InlineSpace;
+        C.Conversions = &I[NumInlineSequences];
+        NumInlineSequences += NumConversions;
+      } else {
+        // Otherwise get memory from the allocator.
+        C.Conversions = ConversionSequenceAllocator
+                          .Allocate<ImplicitConversionSequence>(NumConversions);
+      }
+
+      // Construct the new objects.
+      for (unsigned i = 0; i != NumConversions; ++i)
+        new (&C.Conversions[i]) ImplicitConversionSequence();
+
+      C.NumConversions = NumConversions;
+      return C;
     }
 
     /// Find the best viable function on this overload set, if it exists.

diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp
index 2f1daf2..7f1c86c 100644
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp

@@ -543,6 +543,7 @@
 void OverloadCandidateSet::clear() {
   Candidates.clear();
   Functions.clear();
+  NumInlineSequences = 0;
 }
 
 namespace {
@@ -7018,8 +7019,8 @@
   //   A viable function F1 is defined to be a better function than another
   //   viable function F2 if for all arguments i, ICSi(F1) is not a worse
   //   conversion sequence than ICSi(F2), and then...
-  unsigned NumArgs = Cand1.Conversions.size();
-  assert(Cand2.Conversions.size() == NumArgs && "Overload candidate mismatch");
+  unsigned NumArgs = Cand1.NumConversions;
+  assert(Cand2.NumConversions == NumArgs && "Overload candidate mismatch");
   bool HasBetterConversion = false;
   for (unsigned ArgIdx = StartArg; ArgIdx < NumArgs; ++ArgIdx) {
     switch (CompareImplicitConversionSequences(S,
@@ -7718,7 +7719,7 @@
 
   case ovl_fail_bad_conversion: {
     unsigned I = (Cand->IgnoreObjectArgument ? 1 : 0);
-    for (unsigned N = Cand->Conversions.size(); I != N; ++I)
+    for (unsigned N = Cand->NumConversions; I != N; ++I)
       if (Cand->Conversions[I].isBad())
         return DiagnoseBadConversion(S, Cand, I);
 
@@ -7770,12 +7771,12 @@
                                   const char *Opc,
                                   SourceLocation OpLoc,
                                   OverloadCandidate *Cand) {
-  assert(Cand->Conversions.size() <= 2 && "builtin operator is not binary");
+  assert(Cand->NumConversions <= 2 && "builtin operator is not binary");
   std::string TypeStr("operator");
   TypeStr += Opc;
   TypeStr += "(";
   TypeStr += Cand->BuiltinTypes.ParamTypes[0].getAsString();
-  if (Cand->Conversions.size() == 1) {
+  if (Cand->NumConversions == 1) {
     TypeStr += ")";
     S.Diag(OpLoc, diag::note_ovl_builtin_unary_candidate) << TypeStr;
   } else {
@@ -7788,7 +7789,7 @@
 
 void NoteAmbiguousUserConversions(Sema &S, SourceLocation OpLoc,
                                   OverloadCandidate *Cand) {
-  unsigned NoOperands = Cand->Conversions.size();
+  unsigned NoOperands = Cand->NumConversions;
   for (unsigned ArgIdx = 0; ArgIdx < NoOperands; ++ArgIdx) {
     const ImplicitConversionSequence &ICS = Cand->Conversions[ArgIdx];
     if (ICS.isBad()) break; // all meaningless after first invalid
@@ -7892,11 +7893,11 @@
 
         // If there's any ordering between the defined conversions...
         // FIXME: this might not be transitive.
-        assert(L->Conversions.size() == R->Conversions.size());
+        assert(L->NumConversions == R->NumConversions);
 
         int leftBetter = 0;
         unsigned I = (L->IgnoreObjectArgument || R->IgnoreObjectArgument);
-        for (unsigned E = L->Conversions.size(); I != E; ++I) {
+        for (unsigned E = L->NumConversions; I != E; ++I) {
           switch (CompareImplicitConversionSequences(S,
                                                      L->Conversions[I],
                                                      R->Conversions[I])) {
@@ -7959,7 +7960,7 @@
 
   // Skip forward to the first bad conversion.
   unsigned ConvIdx = (Cand->IgnoreObjectArgument ? 1 : 0);
-  unsigned ConvCount = Cand->Conversions.size();
+  unsigned ConvCount = Cand->NumConversions;
   while (true) {
     assert(ConvIdx != ConvCount && "no bad conversion in candidate");
     ConvIdx++;
commit	09dd3798e100ace40defdc5541277502346213f2	[log] [tgz]
author	Benjamin Kramer <benny.kra@googlemail.com>	Sat Jan 14 16:32:05 2012 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	Sat Jan 14 16:32:05 2012 +0000
tree	69decdebd982bb39d1f87d022ca45ddf23e9c704
parent	0e6a16f3e8c8d80a61e89848d216972aa8daea27 [diff]