Make clang to mark static stack allocations with lifetime markers to enable a more aggressive stack coloring.
Patch by John McCall with help by Shuxin Yang.
rdar://13115369



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@177819 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index a99fe29..c095ff9 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -452,6 +452,22 @@
       CGF.EmitCall(FnInfo, CleanupFn, ReturnValueSlot(), Args);
     }
   };
+
+  /// A cleanup to call @llvm.lifetime.end.
+  class CallLifetimeEnd : public EHScopeStack::Cleanup {
+    llvm::Value *Addr;
+    llvm::Value *Size;
+  public:
+    CallLifetimeEnd(llvm::Value *addr, llvm::Value *size)
+      : Addr(addr), Size(size) {}
+
+    void Emit(CodeGenFunction &CGF, Flags flags) {
+      llvm::Value *castAddr = CGF.Builder.CreateBitCast(Addr, CGF.Int8PtrTy);
+      CGF.Builder.CreateCall2(CGF.CGM.getLLVMLifetimeEndFn(),
+                              Size, castAddr)
+        ->setDoesNotThrow();
+    }
+  };
 }
 
 /// EmitAutoVarWithLifetime - Does the setup required for an automatic
@@ -756,7 +772,6 @@
   // If a global is all zeros, always use a memset.
   if (isa<llvm::ConstantAggregateZero>(Init)) return true;
 
-
   // If a non-zero global is <= 32 bytes, always use a memcpy.  If it is large,
   // do it if it will require 6 or fewer scalar stores.
   // TODO: Should budget depends on the size?  Avoiding a large global warrants
@@ -768,6 +783,20 @@
          canEmitInitWithFewStoresAfterMemset(Init, StoreBudget);
 }
 
+/// Should we use the LLVM lifetime intrinsics for the given local variable?
+static bool shouldUseLifetimeMarkers(CodeGenFunction &CGF, const VarDecl &D,
+                                     unsigned Size) {
+  // For now, only in optimized builds.
+  if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0)
+    return false;
+
+  // Limit the size of marked objects to 32 bytes. We don't want to increase
+  // compile time by marking tiny objects.
+  unsigned SizeThreshold = 32;
+
+  return Size > SizeThreshold;
+}
+
 
 /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
 /// variable declaration with auto, register, or no storage class specifier.
@@ -870,6 +899,20 @@
               getContext().toCharUnitsFromBits(Target.getPointerAlign(0)));
         Alloc->setAlignment(allocaAlignment.getQuantity());
         DeclPtr = Alloc;
+
+        // Emit a lifetime intrinsic if meaningful.  There's no point
+        // in doing this if we don't have a valid insertion point (?).
+        uint64_t size = CGM.getDataLayout().getTypeAllocSize(LTy);
+        if (HaveInsertPoint() && shouldUseLifetimeMarkers(*this, D, size)) {
+          llvm::Value *sizeV = llvm::ConstantInt::get(Int64Ty, size);
+
+          emission.SizeForLifetimeMarkers = sizeV;
+          llvm::Value *castAddr = Builder.CreateBitCast(Alloc, Int8PtrTy);
+          Builder.CreateCall2(CGM.getLLVMLifetimeStartFn(), sizeV, castAddr)
+            ->setDoesNotThrow();
+        } else {
+          assert(!emission.useLifetimeMarkers());
+        }
       }
     } else {
       // Targets that don't support recursion emit locals as globals.
@@ -1215,6 +1258,14 @@
 
   const VarDecl &D = *emission.Variable;
 
+  // Make sure we call @llvm.lifetime.end.  This needs to happen
+  // *last*, so the cleanup needs to be pushed *first*.
+  if (emission.useLifetimeMarkers()) {
+    EHStack.pushCleanup<CallLifetimeEnd>(NormalCleanup,
+                                         emission.getAllocatedAddress(),
+                                         emission.getSizeForLifetimeMarkers());
+  }
+
   // Check the type for a cleanup.
   if (QualType::DestructionKind dtorKind = D.getType().isDestructedType())
     emitAutoVarTypeCleanup(emission, dtorKind);
@@ -1485,6 +1536,22 @@
                                                   elementType, destroyer);
 }
 
+/// Lazily declare the @llvm.lifetime.start intrinsic.
+llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() {
+  if (LifetimeStartFn) return LifetimeStartFn;
+  LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(),
+                                            llvm::Intrinsic::lifetime_start);
+  return LifetimeStartFn;
+}
+
+/// Lazily declare the @llvm.lifetime.end intrinsic.
+llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() {
+  if (LifetimeEndFn) return LifetimeEndFn;
+  LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(),
+                                              llvm::Intrinsic::lifetime_end);
+  return LifetimeEndFn;
+}
+
 namespace {
   /// A cleanup to perform a release of an object at the end of a
   /// function.  This is used to balance out the incoming +1 of a
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 9e7ddfb..06f7460 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -319,6 +319,12 @@
 }
 
 void CodeGenFunction::EmitLabel(const LabelDecl *D) {
+  // Add this label to the current lexical scope if we're within any
+  // normal cleanups.  Jumps "in" to this label --- when permitted by
+  // the language --- may need to be routed around such cleanups.
+  if (EHStack.hasNormalCleanups() && CurLexicalScope)
+    CurLexicalScope->addLabel(D);
+
   JumpDest &Dest = LabelMap[D];
 
   // If we didn't need a forward reference to this label, just go
@@ -330,16 +336,36 @@
   // it from the branch-fixups list.
   } else {
     assert(!Dest.getScopeDepth().isValid() && "already emitted label!");
-    Dest = JumpDest(Dest.getBlock(),
-                    EHStack.stable_begin(),
-                    Dest.getDestIndex());
-
+    Dest.setScopeDepth(EHStack.stable_begin());
     ResolveBranchFixups(Dest.getBlock());
   }
 
   EmitBlock(Dest.getBlock());
 }
 
+/// Change the cleanup scope of the labels in this lexical scope to
+/// match the scope of the enclosing context.
+void CodeGenFunction::LexicalScope::rescopeLabels() {
+  assert(!Labels.empty());
+  EHScopeStack::stable_iterator innermostScope
+    = CGF.EHStack.getInnermostNormalCleanup();
+
+  // Change the scope depth of all the labels.
+  for (SmallVectorImpl<const LabelDecl*>::const_iterator
+         i = Labels.begin(), e = Labels.end(); i != e; ++i) {
+    assert(CGF.LabelMap.count(*i));
+    JumpDest &dest = CGF.LabelMap.find(*i)->second;
+    assert(dest.getScopeDepth().isValid());
+    assert(innermostScope.encloses(dest.getScopeDepth()));
+    dest.setScopeDepth(innermostScope);
+  }
+
+  // Reparent the labels if the new scope also has cleanups.
+  if (innermostScope != EHScopeStack::stable_end() && ParentScope) {
+    ParentScope->Labels.append(Labels.begin(), Labels.end());
+  }
+}
+
 
 void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) {
   EmitLabel(S.getDecl());
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index bd24435..04de55d 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -45,7 +45,7 @@
     IndirectBranch(0), SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0),
     CXXABIThisDecl(0), CXXABIThisValue(0), CXXThisValue(0),
     CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
-    OutermostConditional(0), TerminateLandingPad(0),
+    OutermostConditional(0), CurLexicalScope(0), TerminateLandingPad(0),
     TerminateHandler(0), TrapBB(0) {
   if (!suppressNewContext)
     CGM.getCXXABI().getMangleContext().startNewFunction();
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 46848ae..e40a731 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -562,6 +562,11 @@
     EHScopeStack::stable_iterator getScopeDepth() const { return ScopeDepth; }
     unsigned getDestIndex() const { return Index; }
 
+    // This should be used cautiously.
+    void setScopeDepth(EHScopeStack::stable_iterator depth) {
+      ScopeDepth = depth;
+    }
+
   private:
     llvm::BasicBlock *Block;
     EHScopeStack::stable_iterator ScopeDepth;
@@ -853,6 +858,8 @@
 
   class LexicalScope: protected RunCleanupsScope {
     SourceRange Range;
+    SmallVector<const LabelDecl*, 4> Labels;
+    LexicalScope *ParentScope;
 
     LexicalScope(const LexicalScope &) LLVM_DELETED_FUNCTION;
     void operator=(const LexicalScope &) LLVM_DELETED_FUNCTION;
@@ -860,15 +867,23 @@
   public:
     /// \brief Enter a new cleanup scope.
     explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range)
-      : RunCleanupsScope(CGF), Range(Range) {
+      : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) {
+      CGF.CurLexicalScope = this;
       if (CGDebugInfo *DI = CGF.getDebugInfo())
         DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin());
     }
 
+    void addLabel(const LabelDecl *label) {
+      assert(PerformCleanup && "adding label to dead scope?");
+      Labels.push_back(label);
+    }
+
     /// \brief Exit this cleanup scope, emitting any accumulated
     /// cleanups.
     ~LexicalScope() {
-      if (PerformCleanup) endLexicalScope();
+      // If we should perform a cleanup, force them now.  Note that
+      // this ends the cleanup scope before rescoping any labels.
+      if (PerformCleanup) ForceCleanup();
     }
 
     /// \brief Force the emission of cleanups now, instead of waiting
@@ -880,9 +895,14 @@
 
   private:
     void endLexicalScope() {
+      CGF.CurLexicalScope = ParentScope;
       if (CGDebugInfo *DI = CGF.getDebugInfo())
         DI->EmitLexicalBlockEnd(CGF.Builder, Range.getEnd());
+      if (!Labels.empty())
+        rescopeLabels();
     }
+
+    void rescopeLabels();
   };
 
 
@@ -1205,6 +1225,8 @@
   /// temporary should be destroyed conditionally.
   ConditionalEvaluation *OutermostConditional;
 
+  /// The current lexical scope.
+  LexicalScope *CurLexicalScope;
 
   /// ByrefValueInfoMap - For each __block variable, contains a pair of the LLVM
   /// type as well as the field number that contains the actual data.
@@ -2001,18 +2023,34 @@
     /// initializer.
     bool IsConstantAggregate;
 
+    /// Non-null if we should use lifetime annotations.
+    llvm::Value *SizeForLifetimeMarkers;
+
     struct Invalid {};
     AutoVarEmission(Invalid) : Variable(0) {}
 
     AutoVarEmission(const VarDecl &variable)
       : Variable(&variable), Address(0), NRVOFlag(0),
-        IsByRef(false), IsConstantAggregate(false) {}
+        IsByRef(false), IsConstantAggregate(false),
+        SizeForLifetimeMarkers(0) {}
 
     bool wasEmittedAsGlobal() const { return Address == 0; }
 
   public:
     static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); }
 
+    bool useLifetimeMarkers() const { return SizeForLifetimeMarkers != 0; }
+    llvm::Value *getSizeForLifetimeMarkers() const {
+      assert(useLifetimeMarkers());
+      return SizeForLifetimeMarkers;
+    }
+
+    /// Returns the raw, allocated address, which is not necessarily
+    /// the address of the object itself.
+    llvm::Value *getAllocatedAddress() const {
+      return Address;
+    }
+
     /// Returns the address of the object within this declaration.
     /// Note that this does not chase the forwarding pointer for
     /// __block decls.
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 1737744..9f604e8 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -86,6 +86,7 @@
     NSConcreteGlobalBlock(0), NSConcreteStackBlock(0),
     BlockObjectAssign(0), BlockObjectDispose(0),
     BlockDescriptorType(0), GenericBlockLiteralType(0),
+    LifetimeStartFn(0), LifetimeEndFn(0),
     SanitizerBlacklist(CGO.SanitizerBlacklistFile),
     SanOpts(SanitizerBlacklist.isIn(M) ?
             SanitizerOptions::Disabled : LangOpts.Sanitize) {
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index a5f0689..fcd4830 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -381,6 +381,12 @@
     int GlobalUniqueCount;
   } Block;
 
+  /// void @llvm.lifetime.start(i64 %size, i8* nocapture <ptr>)
+  llvm::Constant *LifetimeStartFn;
+
+  /// void @llvm.lifetime.end(i64 %size, i8* nocapture <ptr>)
+  llvm::Constant *LifetimeEndFn;
+
   GlobalDecl initializedGlobalDecl;
 
   llvm::BlackList SanitizerBlacklist;
@@ -757,6 +763,9 @@
 
   ///@}
 
+  llvm::Constant *getLLVMLifetimeStartFn();
+  llvm::Constant *getLLVMLifetimeEndFn();
+
   // UpdateCompleteType - Make sure that this type is translated.
   void UpdateCompletedType(const TagDecl *TD);
 
diff --git a/test/CodeGen/lifetime2.c b/test/CodeGen/lifetime2.c
new file mode 100644
index 0000000..ffff5cc
--- /dev/null
+++ b/test/CodeGen/lifetime2.c
@@ -0,0 +1,17 @@
+// RUN: %clang -S -emit-llvm -o - -O2 %s | FileCheck %s -check-prefix=O2
+// RUN: %clang -S -emit-llvm -o - -O0 %s | FileCheck %s -check-prefix=O0
+
+extern int bar(char *A, int n);
+
+// O0-NOT: @llvm.lifetime.start
+int foo (int n) {
+  if (n) {
+// O2: @llvm.lifetime.start
+    char A[100];
+    return bar(A, 1);
+  } else {
+// O2: @llvm.lifetime.start
+    char A[100];
+    return bar(A, 2);
+  }
+}
diff --git a/test/CodeGenObjC/arc-blocks.m b/test/CodeGenObjC/arc-blocks.m
index c7bc502..3281b2a 100644
--- a/test/CodeGenObjC/arc-blocks.m
+++ b/test/CodeGenObjC/arc-blocks.m
@@ -128,7 +128,7 @@
   // CHECK-NEXT: call void @_Block_object_dispose(i8* [[T0]], i32 8)
   // CHECK-NEXT: [[T0:%.*]] = load i8** [[SLOT]]
   // CHECK-NEXT: call void @objc_release(i8* [[T0]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 
   // CHECK:    define internal void @__Block_byref_object_copy_
   // CHECK:      [[T0:%.*]] = getelementptr inbounds [[BYREF_T]]* {{%.*}}, i32 0, i32 6
@@ -207,7 +207,7 @@
   // CHECK:      [[T0:%.*]] = bitcast [[BYREF_T]]* [[VAR]] to i8*
   // CHECK-NEXT: call void @_Block_object_dispose(i8* [[T0]], i32 8)
   // CHECK-NEXT: call void @objc_destroyWeak(i8** [[SLOT]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 
   // CHECK:    define internal void @__Block_byref_object_copy_
   // CHECK:      [[T0:%.*]] = getelementptr inbounds [[BYREF_T]]* {{%.*}}, i32 0, i32 6
@@ -256,14 +256,14 @@
   // CHECK:      call void @test7_helper(
   // CHECK-NEXT: call void @objc_destroyWeak(i8** {{%.*}})
   // CHECK-NEXT: call void @objc_destroyWeak(i8** [[VAR]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 
   // CHECK:    define internal void @__test7_block_invoke
   // CHECK:      [[SLOT:%.*]] = getelementptr inbounds [[BLOCK_T]]* {{%.*}}, i32 0, i32 5
   // CHECK-NEXT: [[T0:%.*]] = call i8* @objc_loadWeakRetained(i8** [[SLOT]])
   // CHECK-NEXT: call void @test7_consume(i8* [[T0]])
   // CHECK-NEXT: call void @objc_release(i8* [[T0]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 
   // CHECK:    define internal void @__copy_helper_block_
   // CHECK:      getelementptr
@@ -296,7 +296,7 @@
 // CHECK-NEXT: [[T1:%.*]] = load [[TEST8]]** [[D0]]
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[TEST8]]* [[T1]] to i8*
 // CHECK-NEXT: call void @objc_release(i8* [[T2]])
-// CHECK-NEXT: ret void
+// CHECK: ret void
 
   extern void test8_helper(void (^)(void));
   test8_helper(^{ (void) self; });
@@ -354,7 +354,7 @@
   // CHECK-NEXT: [[T1:%.*]] = load void ()** [[SLOT]]
   // CHECK-NEXT: [[T2:%.*]] = bitcast void ()* [[T1]] to i8*
   // CHECK-NEXT: call void @objc_release(i8* [[T2]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 }
 
 // <rdar://problem/10402698>: do this copy and dispose with
@@ -374,7 +374,7 @@
 // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retainBlock(i8* [[T1]])
 // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to void ()*
 // CHECK-NEXT: store void ()* [[T3]], void ()** [[D2]], align 8
-// CHECK-NEXT: ret void
+// CHECK: ret void
 
 // CHECK: define internal void @__Block_byref_object_dispose
 // CHECK:      [[T0:%.*]] = load i8** {{%.*}}
@@ -418,7 +418,7 @@
   // CHECK-NEXT: [[T1:%.*]] = load void ()** [[SLOT]]
   // CHECK-NEXT: [[T2:%.*]] = bitcast void ()* [[T1]] to i8*
   // CHECK-NEXT: call void @objc_release(i8* [[T2]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 }
 
 // rdar://problem/10088932
@@ -438,7 +438,7 @@
   // CHECK-NEXT: call void @test11_helper(i8* [[T4]])
   // CHECK-NEXT: [[T5:%.*]] = bitcast void ()* [[T3]] to i8*
   // CHECK-NEXT: call void @objc_release(i8* [[T5]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 }
 void test11b(void) {
   int x;
@@ -456,7 +456,7 @@
   // CHECK-NEXT: store i8* [[T4]], i8** [[B]], align 8
   // CHECK-NEXT: [[T5:%.*]] = load i8** [[B]]
   // CHECK-NEXT: call void @objc_release(i8* [[T5]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 }
 
 // rdar://problem/9979150
diff --git a/test/CodeGenObjC/arc.m b/test/CodeGenObjC/arc.m
index 45cb5d7..7262dc8 100644
--- a/test/CodeGenObjC/arc.m
+++ b/test/CodeGenObjC/arc.m
@@ -353,7 +353,7 @@
   // CHECK-NEXT: [[T4:%.*]] = load i8** [[Y]]
   // CHECK-NEXT: call void @objc_release(i8* [[T4]]) [[NUW]], !clang.imprecise_release
   // CHECK-NEXT: call void @objc_destroyWeak(i8** [[X]])
-  // CHECK-NEXT: ret void
+  // CHECK: ret void
 }
 
 // Indirect consuming calls.
@@ -460,8 +460,9 @@
 void test19() {
   // CHECK: define void @test19()
   // CHECK:      [[X:%.*]] = alloca [5 x i8*], align 16
+  // CHECK: call void @llvm.lifetime.start
   // CHECK-NEXT: [[T0:%.*]] = bitcast [5 x i8*]* [[X]] to i8*
-  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 40, i32 16, i1 false)
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 40, i32 16, i1 false)
   id x[5];
 
   extern id test19_helper(void);