Better diagnostics for string initialization.

This commit improves Clang's diagnostics for string initialization.
Where it would previously say:

  /tmp/a.c:3:9: error: array initializer must be an initializer list
  wchar_t s[] = "Hi";
          ^
  /tmp/a.c:4:6: error: array initializer must be an initializer list or string literal
  char t[] = L"Hi";
       ^

It will now say

  /tmp/a.c:3:9: error: initializing wide char array with non-wide string literal
  wchar_t s[] = "Hi";
          ^
  /tmp/a.c:4:6: error: initializing char array with wide string literal
  char t[] = L"Hi";
       ^

As a bonus, it also fixes the fact that Clang would previously reject
this valid C11 code:

  char16_t s[] = u"hi";
  char32_t t[] = U"hi";

because it would only recognize the built-in types for char16_t and
char32_t, which do not exist in C.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@181880 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index db46967..bac681e 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -4282,7 +4282,13 @@
   "or an initializer">;
 def err_array_init_not_init_list : Error<
   "array initializer must be an initializer "
-  "list%select{| or string literal}0">;
+  "list%select{| or string literal| or wide string literal}0">;
+def err_array_init_narrow_string_into_wchar : Error<
+  "initializing wide char array with non-wide string literal">;
+def err_array_init_wide_string_into_char : Error<
+  "initializing char array with wide string literal">;
+def err_array_init_incompat_wide_string_into_wchar : Error<
+  "initializing wide char array with incompatible wide string literal">;
 def err_array_init_different_type : Error<
   "cannot initialize array %diff{of type $ with array of type $|"
   "with different type of array}0,1">;
diff --git a/include/clang/Sema/Initialization.h b/include/clang/Sema/Initialization.h
index 3f70672..727ceeb 100644
--- a/include/clang/Sema/Initialization.h
+++ b/include/clang/Sema/Initialization.h
@@ -706,6 +706,16 @@
     /// \brief Array must be initialized with an initializer list or a 
     /// string literal.
     FK_ArrayNeedsInitListOrStringLiteral,
+    /// \brief Array must be initialized with an initializer list or a
+    /// wide string literal.
+    FK_ArrayNeedsInitListOrWideStringLiteral,
+    /// \brief Initializing a wide char array with narrow string literal.
+    FK_NarrowStringIntoWideCharArray,
+    /// \brief Initializing char array with wide string literal.
+    FK_WideStringIntoCharArray,
+    /// \brief Initializing wide char array with incompatible wide string
+    /// literal.
+    FK_IncompatWideStringIntoWideChar,
     /// \brief Array type mismatch.
     FK_ArrayTypeMismatch,
     /// \brief Non-constant array initializer
diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp
index 7016e56..a3b7878 100644
--- a/lib/Sema/SemaInit.cpp
+++ b/lib/Sema/SemaInit.cpp
@@ -32,54 +32,99 @@
 // Sema Initialization Checking
 //===----------------------------------------------------------------------===//
 
-static Expr *IsStringInit(Expr *Init, const ArrayType *AT,
-                          ASTContext &Context) {
+/// \brief Check whether T is compatible with a wide character type (wchar_t,
+/// char16_t or char32_t).
+static bool IsWideCharCompatible(QualType T, ASTContext &Context) {
+  if (Context.typesAreCompatible(Context.getWideCharType(), T))
+    return true;
+  if (Context.getLangOpts().CPlusPlus || Context.getLangOpts().C11) {
+    return Context.typesAreCompatible(Context.Char16Ty, T) ||
+           Context.typesAreCompatible(Context.Char32Ty, T);
+  }
+  return false;
+}
+
+enum StringInitFailureKind {
+  SIF_None,
+  SIF_NarrowStringIntoWideChar,
+  SIF_WideStringIntoChar,
+  SIF_IncompatWideStringIntoWideChar,
+  SIF_Other
+};
+
+/// \brief Check whether the array of type AT can be initialized by the Init
+/// expression by means of string initialization. Returns SIF_None if so,
+/// otherwise returns a StringInitFailureKind that describes why the
+/// initialization would not work.
+static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT,
+                                          ASTContext &Context) {
   if (!isa<ConstantArrayType>(AT) && !isa<IncompleteArrayType>(AT))
-    return 0;
+    return SIF_Other;
 
   // See if this is a string literal or @encode.
   Init = Init->IgnoreParens();
 
   // Handle @encode, which is a narrow string.
   if (isa<ObjCEncodeExpr>(Init) && AT->getElementType()->isCharType())
-    return Init;
+    return SIF_None;
 
   // Otherwise we can only handle string literals.
   StringLiteral *SL = dyn_cast<StringLiteral>(Init);
-  if (SL == 0) return 0;
+  if (SL == 0)
+    return SIF_Other;
 
-  QualType ElemTy = Context.getCanonicalType(AT->getElementType());
+  const QualType ElemTy =
+      Context.getCanonicalType(AT->getElementType()).getUnqualifiedType();
 
   switch (SL->getKind()) {
   case StringLiteral::Ascii:
   case StringLiteral::UTF8:
     // char array can be initialized with a narrow string.
     // Only allow char x[] = "foo";  not char x[] = L"foo";
-    return ElemTy->isCharType() ? Init : 0;
+    if (ElemTy->isCharType())
+      return SIF_None;
+    if (IsWideCharCompatible(ElemTy, Context))
+      return SIF_NarrowStringIntoWideChar;
+    return SIF_Other;
+  // C99 6.7.8p15 (with correction from DR343), or C11 6.7.9p15:
+  // "An array with element type compatible with a qualified or unqualified
+  // version of wchar_t, char16_t, or char32_t may be initialized by a wide
+  // string literal with the corresponding encoding prefix (L, u, or U,
+  // respectively), optionally enclosed in braces.
   case StringLiteral::UTF16:
-    return ElemTy->isChar16Type() ? Init : 0;
+    if (Context.typesAreCompatible(Context.Char16Ty, ElemTy))
+      return SIF_None;
+    if (ElemTy->isCharType())
+      return SIF_WideStringIntoChar;
+    if (IsWideCharCompatible(ElemTy, Context))
+      return SIF_IncompatWideStringIntoWideChar;
+    return SIF_Other;
   case StringLiteral::UTF32:
-    return ElemTy->isChar32Type() ? Init : 0;
+    if (Context.typesAreCompatible(Context.Char32Ty, ElemTy))
+      return SIF_None;
+    if (ElemTy->isCharType())
+      return SIF_WideStringIntoChar;
+    if (IsWideCharCompatible(ElemTy, Context))
+      return SIF_IncompatWideStringIntoWideChar;
+    return SIF_Other;
   case StringLiteral::Wide:
-    // wchar_t array can be initialized with a wide string: C99 6.7.8p15 (with
-    // correction from DR343): "An array with element type compatible with a
-    // qualified or unqualified version of wchar_t may be initialized by a wide
-    // string literal, optionally enclosed in braces."
-    if (Context.typesAreCompatible(Context.getWideCharType(),
-                                   ElemTy.getUnqualifiedType()))
-      return Init;
-
-    return 0;
+    if (Context.typesAreCompatible(Context.getWideCharType(), ElemTy))
+      return SIF_None;
+    if (ElemTy->isCharType())
+      return SIF_WideStringIntoChar;
+    if (IsWideCharCompatible(ElemTy, Context))
+      return SIF_IncompatWideStringIntoWideChar;
+    return SIF_Other;
   }
 
   llvm_unreachable("missed a StringLiteral kind?");
 }
 
-static Expr *IsStringInit(Expr *init, QualType declType, ASTContext &Context) {
+static bool IsStringInit(Expr* init, QualType declType, ASTContext& Context) {
   const ArrayType *arrayType = Context.getAsArrayType(declType);
-  if (!arrayType) return 0;
-
-  return IsStringInit(init, arrayType, Context);
+  if (!arrayType)
+    return false;
+  return IsStringInit(init, arrayType, Context) == SIF_None;
 }
 
 /// Update the type of a string literal, including any surrounding parentheses,
@@ -806,10 +851,10 @@
     // array member.  There's nothing we can do with the completed
     // type here, though.
 
-    if (Expr *Str = IsStringInit(expr, arrayType, SemaRef.Context)) {
+    if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) {
       if (!VerifyOnly) {
-        CheckStringInit(Str, ElemType, arrayType, SemaRef);
-        UpdateStructuredListElement(StructuredList, StructuredIndex, Str);
+        CheckStringInit(expr, ElemType, arrayType, SemaRef);
+        UpdateStructuredListElement(StructuredList, StructuredIndex, expr);
       }
       ++Index;
       return;
@@ -1189,16 +1234,17 @@
 
   // Check for the special-case of initializing an array with a string.
   if (Index < IList->getNumInits()) {
-    if (Expr *Str = IsStringInit(IList->getInit(Index), arrayType,
-                                 SemaRef.Context)) {
+    if (IsStringInit(IList->getInit(Index), arrayType, SemaRef.Context) ==
+        SIF_None) {
       // We place the string literal directly into the resulting
       // initializer list. This is the only place where the structure
       // of the structured initializer list doesn't match exactly,
       // because doing so would involve allocating one character
       // constant for each string.
       if (!VerifyOnly) {
-        CheckStringInit(Str, DeclType, arrayType, SemaRef);
-        UpdateStructuredListElement(StructuredList, StructuredIndex, Str);
+        CheckStringInit(IList->getInit(Index), DeclType, arrayType, SemaRef);
+        UpdateStructuredListElement(StructuredList, StructuredIndex,
+                                    IList->getInit(Index));
         StructuredList->resizeInits(SemaRef.Context, StructuredIndex);
       }
       ++Index;
@@ -2507,6 +2553,10 @@
   case FK_TooManyInitsForReference:
   case FK_ArrayNeedsInitList:
   case FK_ArrayNeedsInitListOrStringLiteral:
+  case FK_ArrayNeedsInitListOrWideStringLiteral:
+  case FK_NarrowStringIntoWideCharArray:
+  case FK_WideStringIntoCharArray:
+  case FK_IncompatWideStringIntoWideChar:
   case FK_AddressOfOverloadFailed: // FIXME: Could do better
   case FK_NonConstLValueReferenceBindingToTemporary:
   case FK_NonConstLValueReferenceBindingToUnrelated:
@@ -4278,9 +4328,23 @@
       return;
     }
 
-    if (Initializer && IsStringInit(Initializer, DestAT, Context)) {
-      TryStringLiteralInitialization(S, Entity, Kind, Initializer, *this);
-      return;
+    if (Initializer) {
+      switch (IsStringInit(Initializer, DestAT, Context)) {
+      case SIF_None:
+        TryStringLiteralInitialization(S, Entity, Kind, Initializer, *this);
+        return;
+      case SIF_NarrowStringIntoWideChar:
+        SetFailed(FK_NarrowStringIntoWideCharArray);
+        return;
+      case SIF_WideStringIntoChar:
+        SetFailed(FK_WideStringIntoCharArray);
+        return;
+      case SIF_IncompatWideStringIntoWideChar:
+        SetFailed(FK_IncompatWideStringIntoWideChar);
+        return;
+      case SIF_Other:
+        break;
+      }
     }
 
     // Note: as an GNU C extension, we allow initialization of an
@@ -4307,8 +4371,10 @@
       TryListInitialization(S, Entity, Kind, cast<InitListExpr>(Initializer),
                             *this);
       AddParenthesizedArrayInitStep(DestType);
-    } else if (DestAT->getElementType()->isAnyCharacterType())
+    } else if (DestAT->getElementType()->isCharType())
       SetFailed(FK_ArrayNeedsInitListOrStringLiteral);
+    else if (IsWideCharCompatible(DestAT->getElementType(), Context))
+      SetFailed(FK_ArrayNeedsInitListOrWideStringLiteral);
     else
       SetFailed(FK_ArrayNeedsInitList);
 
@@ -5816,11 +5882,24 @@
     break;
 
   case FK_ArrayNeedsInitList:
-  case FK_ArrayNeedsInitListOrStringLiteral:
-    S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list)
-      << (Failure == FK_ArrayNeedsInitListOrStringLiteral);
+    S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 0;
     break;
-
+  case FK_ArrayNeedsInitListOrStringLiteral:
+    S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 1;
+    break;
+  case FK_ArrayNeedsInitListOrWideStringLiteral:
+    S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 2;
+    break;
+  case FK_NarrowStringIntoWideCharArray:
+    S.Diag(Kind.getLocation(), diag::err_array_init_narrow_string_into_wchar);
+    break;
+  case FK_WideStringIntoCharArray:
+    S.Diag(Kind.getLocation(), diag::err_array_init_wide_string_into_char);
+    break;
+  case FK_IncompatWideStringIntoWideChar:
+    S.Diag(Kind.getLocation(),
+           diag::err_array_init_incompat_wide_string_into_wchar);
+    break;
   case FK_ArrayTypeMismatch:
   case FK_NonConstantArrayInit:
     S.Diag(Kind.getLocation(), 
@@ -6192,6 +6271,22 @@
       OS << "array requires initializer list or string literal";
       break;
 
+    case FK_ArrayNeedsInitListOrWideStringLiteral:
+      OS << "array requires initializer list or wide string literal";
+      break;
+
+    case FK_NarrowStringIntoWideCharArray:
+      OS << "narrow string into wide char array";
+      break;
+
+    case FK_WideStringIntoCharArray:
+      OS << "wide string into char array";
+      break;
+
+    case FK_IncompatWideStringIntoWideChar:
+      OS << "incompatible wide string into wide char array";
+      break;
+
     case FK_ArrayTypeMismatch:
       OS << "array type mismatch";
       break;
diff --git a/test/Lexer/char-literal.cpp b/test/Lexer/char-literal.cpp
index b2fab34..1cd14a9 100644
--- a/test/Lexer/char-literal.cpp
+++ b/test/Lexer/char-literal.cpp
@@ -36,8 +36,4 @@
 char16_t q[2] = u"\U00010000";
 #ifdef __cplusplus
 // expected-error@-2 {{too long}}
-#else
-// FIXME: The above should be accepted in C11 mode.
-// expected-error@-6 {{must be an initializer list}}
-// expected-error@-6 {{must be an initializer list}}
 #endif
diff --git a/test/Sema/ms-wchar.c b/test/Sema/ms-wchar.c
index 52a736c..febaf28 100644
--- a/test/Sema/ms-wchar.c
+++ b/test/Sema/ms-wchar.c
@@ -12,4 +12,7 @@
 unsigned short g; // expected-error {{redefinition of 'g' with a different type: 'unsigned short' vs '__wchar_t'}}
 
 // The type of a wide string literal is actually not __wchar_t.
-__wchar_t s[] = L"Hello world!"; // expected-error {{array initializer must be an initializer list}}
+__wchar_t s[] = L"Hello world!"; // expected-error-re {{array initializer must be an initializer list$}}
+
+// Do not suggest initializing with a string here, because it would not work.
+__wchar_t t[] = 1; // expected-error-re {{array initializer must be an initializer list$}}
diff --git a/test/Sema/string-init.c b/test/Sema/string-init.c
new file mode 100644
index 0000000..96ee360
--- /dev/null
+++ b/test/Sema/string-init.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -triple x86_64-pc-linux -verify %s
+
+// Note: these match the types specified by the target above.
+typedef int wchar_t;
+typedef unsigned short char16_t;
+typedef unsigned int char32_t;
+
+void f() {
+  char a1[] = "a"; // No error.
+  char a2[] = u8"a"; // No error.
+  char a3[] = u"a"; // expected-error{{initializing char array with wide string literal}}
+  char a4[] = U"a"; // expected-error{{initializing char array with wide string literal}}
+  char a5[] = L"a"; // expected-error{{initializing char array with wide string literal}}
+
+  wchar_t b1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  wchar_t b2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  wchar_t b3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  wchar_t b4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  wchar_t b5[] = L"a"; // No error.
+
+  char16_t c1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  char16_t c2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  char16_t c3[] = u"a"; // No error.
+  char16_t c4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  char16_t c5[] = L"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+
+  char32_t d1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  char32_t d2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  char32_t d3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  char32_t d4[] = U"a"; // No error.
+  char32_t d5[] = L"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+
+  int e1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  int e2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  int e3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  int e4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  int e5[] = L"a"; // No error.
+
+  long f1[] = "a"; // expected-error{{array initializer must be an initializer list}}
+  long f2[] = u8"a"; // expected-error{{array initializer must be an initializer list}}}
+  long f3[] = u"a"; // expected-error{{array initializer must be an initializer list}}
+  long f4[] = U"a"; // expected-error{{array initializer must be an initializer list}}
+  long f5[] = L"a"; // expected-error{{array initializer must be an initializer list}}
+}
+
+void g() {
+  char a[] = 1; // expected-error{{array initializer must be an initializer list or string literal}}
+  wchar_t b[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}}
+  char16_t c[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}}
+  char32_t d[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}}
+}
diff --git a/test/Sema/wchar.c b/test/Sema/wchar.c
index 816245f..13c2f58 100644
--- a/test/Sema/wchar.c
+++ b/test/Sema/wchar.c
@@ -19,6 +19,6 @@
 void foo() {
   WCHAR_T_TYPE t1[] = L"x";
   wchar_t tab[] = L"x";
-  WCHAR_T_TYPE t2[] = "x";     // expected-error {{initializer}}
-  char t3[] = L"x";   // expected-error {{initializer}}
+  WCHAR_T_TYPE t2[] = "x";     // expected-error {{initializing wide char array with non-wide string literal}}
+  char t3[] = L"x";   // expected-error {{initializing char array with wide string literal}}
 }
diff --git a/test/SemaCXX/ms-wchar.cpp b/test/SemaCXX/ms-wchar.cpp
index 2cbf745..878d8ca 100644
--- a/test/SemaCXX/ms-wchar.cpp
+++ b/test/SemaCXX/ms-wchar.cpp
@@ -7,3 +7,6 @@
 __wchar_t s[] = L"Hello world!";
 
 unsigned short t[] = L"Hello world!"; // expected-error{{array initializer must be an initializer list}}
+
+wchar_t u[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}}
+__wchar_t v[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}}
diff --git a/test/SemaCXX/string-init.cpp b/test/SemaCXX/string-init.cpp
new file mode 100644
index 0000000..7e62d18
--- /dev/null
+++ b/test/SemaCXX/string-init.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+
+void f() {
+  char a1[] = "a"; // No error.
+  char a2[] = u8"a"; // No error.
+  char a3[] = u"a"; // expected-error{{initializing char array with wide string literal}}
+  char a4[] = U"a"; // expected-error{{initializing char array with wide string literal}}
+  char a5[] = L"a"; // expected-error{{initializing char array with wide string literal}}
+
+  wchar_t b1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  wchar_t b2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  wchar_t b3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  wchar_t b4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  wchar_t b5[] = L"a"; // No error.
+
+  char16_t c1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  char16_t c2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  char16_t c3[] = u"a"; // No error.
+  char16_t c4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  char16_t c5[] = L"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+
+  char32_t d1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  char32_t d2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}}
+  char32_t d3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+  char32_t d4[] = U"a"; // No error.
+  char32_t d5[] = L"a"; // expected-error{{initializing wide char array with incompatible wide string literal}}
+
+  int e1[] = "a"; // expected-error{{array initializer must be an initializer list}}
+  int e2[] = u8"a"; // expected-error{{array initializer must be an initializer list}}
+  int e3[] = u"a"; // expected-error{{array initializer must be an initializer list}}
+  int e4[] = U"a"; // expected-error{{array initializer must be an initializer list}}
+  int e5[] = L"a"; // expected-error{{array initializer must be an initializer list}}
+}
+
+void g() {
+  char a[] = 1; // expected-error{{array initializer must be an initializer list or string literal}}
+  wchar_t b[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}}
+  char16_t c[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}}
+  char32_t d[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}}
+}