PR12226: don't generate wrong code if a braced string literal is used to
initialize an array of unsigned char. Outside C++11 mode, this bug was benign,
and just resulted in us emitting a constant which was double the required
length, padded with 0s. In C++11, it resulted in us generating an array whose
first element was something like  i8 ptrtoint ([n x i8]* @str to i8).


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@154756 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h
index 558bd00..a7822fa 100644
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -3608,6 +3608,10 @@
     return LBraceLoc.isValid() && RBraceLoc.isValid();
   }
 
+  // Is this an initializer for an array of characters, initialized by a string
+  // literal or an @encode?
+  bool isStringLiteralInit() const;
+
   SourceLocation getLBraceLoc() const { return LBraceLoc; }
   void setLBraceLoc(SourceLocation Loc) { LBraceLoc = Loc; }
   SourceLocation getRBraceLoc() const { return RBraceLoc; }
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 868109e..9556b1a 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -1590,6 +1590,16 @@
       inits[i] = filler;
 }
 
+bool InitListExpr::isStringLiteralInit() const {
+  if (getNumInits() != 1)
+    return false;
+  const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(getType());
+  if (!CAT || !CAT->getElementType()->isIntegerType())
+    return false;
+  const Expr *Init = getInit(0)->IgnoreParenImpCasts();
+  return isa<StringLiteral>(Init) || isa<ObjCEncodeExpr>(Init);
+}
+
 SourceRange InitListExpr::getSourceRange() const {
   if (SyntacticForm)
     return SyntacticForm->getSourceRange();
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index 01c9fe7..98fc0e5 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -1491,15 +1491,19 @@
   llvm_unreachable("base class missing from derived class's bases list");
 }
 
-/// Extract the value of a character from a string literal.
+/// Extract the value of a character from a string literal. CharType is used to
+/// determine the expected signedness of the result -- a string literal used to
+/// initialize an array of 'signed char' or 'unsigned char' might contain chars
+/// of the wrong signedness.
 static APSInt ExtractStringLiteralCharacter(EvalInfo &Info, const Expr *Lit,
-                                            uint64_t Index) {
+                                            uint64_t Index, QualType CharType) {
   // FIXME: Support PredefinedExpr, ObjCEncodeExpr, MakeStringConstant
   const StringLiteral *S = dyn_cast<StringLiteral>(Lit);
   assert(S && "unexpected string literal expression kind");
+  assert(CharType->isIntegerType() && "unexpected character type");
 
   APSInt Value(S->getCharByteWidth() * Info.Ctx.getCharWidth(),
-    Lit->getType()->getArrayElementTypeNoTypeQual()->isUnsignedIntegerType());
+               CharType->isUnsignedIntegerType());
   if (Index < S->getLength())
     Value = S->getCodeUnit(Index);
   return Value;
@@ -1546,7 +1550,7 @@
         assert(I == N - 1 && "extracting subobject of character?");
         assert(!O->hasLValuePath() || O->getLValuePath().empty());
         Obj = APValue(ExtractStringLiteralCharacter(
-          Info, O->getLValueBase().get<const Expr*>(), Index));
+          Info, O->getLValueBase().get<const Expr*>(), Index, SubType));
         return true;
       } else if (O->getArrayInitializedElts() > Index)
         O = &O->getArrayInitializedElt(Index);
@@ -3849,8 +3853,7 @@
 
   // C++11 [dcl.init.string]p1: A char array [...] can be initialized by [...]
   // an appropriately-typed string literal enclosed in braces.
-  if (E->getNumInits() == 1 && E->getInit(0)->isGLValue() &&
-      Info.Ctx.hasSameUnqualifiedType(E->getType(), E->getInit(0)->getType())) {
+  if (E->isStringLiteralInit()) {
     LValue LV;
     if (!EvaluateLValue(E->getInit(0), LV, Info))
       return false;
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index b6efc1c..975f572 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -916,14 +916,8 @@
 
   // Handle initialization of an array.
   if (E->getType()->isArrayType()) {
-    if (E->getNumInits() > 0) {
-      QualType T1 = E->getType();
-      QualType T2 = E->getInit(0)->getType();
-      if (CGF.getContext().hasSameUnqualifiedType(T1, T2)) {
-        EmitAggLoadOfLValue(E->getInit(0));
-        return;
-      }
-    }
+    if (E->isStringLiteralInit())
+      return Visit(E->getInit(0));
 
     QualType elementType =
         CGF.getContext().getAsArrayType(E->getType())->getElementType();
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index d528e0c..bc9f9ef 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -758,17 +758,13 @@
   }
 
   llvm::Constant *EmitArrayInitialization(InitListExpr *ILE) {
-    unsigned NumInitElements = ILE->getNumInits();
-    if (NumInitElements == 1 &&
-        CGM.getContext().hasSameUnqualifiedType(ILE->getType(),
-                                                ILE->getInit(0)->getType()) &&
-        (isa<StringLiteral>(ILE->getInit(0)) ||
-         isa<ObjCEncodeExpr>(ILE->getInit(0))))
+    if (ILE->isStringLiteralInit())
       return Visit(ILE->getInit(0));
 
     llvm::ArrayType *AType =
         cast<llvm::ArrayType>(ConvertType(ILE->getType()));
     llvm::Type *ElemTy = AType->getElementType();
+    unsigned NumInitElements = ILE->getNumInits();
     unsigned NumElements = AType->getNumElements();
 
     // Initialising an array requires us to automatically
diff --git a/test/CodeGenCXX/const-init-cxx11.cpp b/test/CodeGenCXX/const-init-cxx11.cpp
index c745dee..62a345a 100644
--- a/test/CodeGenCXX/const-init-cxx11.cpp
+++ b/test/CodeGenCXX/const-init-cxx11.cpp
@@ -92,6 +92,9 @@
   // CHECK: @_ZN5Array1cE = constant [6 x [4 x i8]] [{{.*}} c"foo\00", [4 x i8] c"a\00\00\00", [4 x i8] c"bar\00", [4 x i8] c"xyz\00", [4 x i8] c"b\00\00\00", [4 x i8] c"123\00"]
   extern constexpr char c[6][4] = { "foo", "a", { "bar" }, { 'x', 'y', 'z' }, { "b" }, '1', '2', '3' };
 
+  // CHECK: @_ZN5Array2ucE = constant [4 x i8] c"foo\00"
+  extern constexpr unsigned char uc[] = { "foo" };
+
   struct C { constexpr C() : n(5) {} int n, m = 3 * n + 1; };
   // CHECK: @_ZN5Array5ctorsE = constant [3 x {{.*}}] [{{.*}} { i32 5, i32 16 }, {{.*}} { i32 5, i32 16 }, {{.*}} { i32 5, i32 16 }]
   extern const C ctors[3];
diff --git a/test/CodeGenCXX/static-init.cpp b/test/CodeGenCXX/static-init.cpp
index ed659de..74278f7 100644
--- a/test/CodeGenCXX/static-init.cpp
+++ b/test/CodeGenCXX/static-init.cpp
@@ -2,6 +2,7 @@
 
 // CHECK: @_ZZ1hvE1i = internal global i32 0, align 4
 // CHECK: @base_req = global [4 x i8] c"foo\00", align 1
+// CHECK: @base_req_uchar = global [4 x i8] c"bar\00", align 1
 
 // CHECK: @_ZZN5test31BC1EvE1u = internal global { i8, [3 x i8] } { i8 97, [3 x i8] undef }, align 4
 // CHECK: @_ZZN5test1L6getvarEiE3var = internal constant [4 x i32] [i32 1, i32 0, i32 2, i32 4], align 16
@@ -64,6 +65,7 @@
 
 // Make sure we emit the initializer correctly for the following:
 char base_req[] = { "foo" };
+unsigned char base_req_uchar[] = { "bar" };
 
 namespace union_static_local {
   // CHECK: define internal void @_ZZN18union_static_local4testEvEN1c4mainEv
diff --git a/test/SemaCXX/constant-expression-cxx11.cpp b/test/SemaCXX/constant-expression-cxx11.cpp
index 41d214a..9f80e71 100644
--- a/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/test/SemaCXX/constant-expression-cxx11.cpp
@@ -615,6 +615,10 @@
 static_assert(agg1.arr[5] == 0, ""); // expected-error {{constant expression}} expected-note {{read of dereferenced one-past-the-end}}
 static_assert(agg1.p == nullptr, "");
 
+static constexpr const unsigned char uc[] = { "foo" };
+static_assert(uc[0] == 'f', "");
+static_assert(uc[3] == 0, "");
+
 namespace SimpleDerivedClass {
 
 struct B {