The UTF16 string referenced by a CFString should go into the __TEXT,__ustring
section. A 'normal' string will go into the __TEXT,__const section, but this
isn't good for UTF16 strings. The __ustring section allows for coalescing, among
other niceties (such as allowing the linker to easily split up strings).

Instead of outputting the UTF16 string as a series of bytes, output it as a
series of shorts. The back-end will then nicely place the UTF16 string into the
correct section, because it's a mensch.
<rdar://problem/10655949>


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@153710 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 6fa33b6..c9f1066 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -1903,8 +1903,10 @@
     return Map.GetOrCreateValue(String);
   }
 
-  // Otherwise, convert the UTF8 literals into a byte string.
-  SmallVector<UTF16, 128> ToBuf(NumBytes);
+  // Otherwise, convert the UTF8 literals into a string of shorts.
+  IsUTF16 = true;
+
+  SmallVector<UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
   const UTF8 *FromPtr = (UTF8 *)String.data();
   UTF16 *ToPtr = &ToBuf[0];
 
@@ -1915,28 +1917,11 @@
   // ConvertUTF8toUTF16 returns the length in ToPtr.
   StringLength = ToPtr - &ToBuf[0];
 
-  // Render the UTF-16 string into a byte array and convert to the target byte
-  // order.
-  //
-  // FIXME: This isn't something we should need to do here.
-  SmallString<128> AsBytes;
-  AsBytes.reserve(StringLength * 2);
-  for (unsigned i = 0; i != StringLength; ++i) {
-    unsigned short Val = ToBuf[i];
-    if (TargetIsLSB) {
-      AsBytes.push_back(Val & 0xFF);
-      AsBytes.push_back(Val >> 8);
-    } else {
-      AsBytes.push_back(Val >> 8);
-      AsBytes.push_back(Val & 0xFF);
-    }
-  }
-  // Append one extra null character, the second is automatically added by our
-  // caller.
-  AsBytes.push_back(0);
-
-  IsUTF16 = true;
-  return Map.GetOrCreateValue(StringRef(AsBytes.data(), AsBytes.size()));
+  // Add an explicit null.
+  *ToPtr = 0;
+  return Map.
+    GetOrCreateValue(StringRef(reinterpret_cast<const char *>(ToBuf.data()),
+                               (StringLength + 1) * 2));
 }
 
 static llvm::StringMapEntry<llvm::Constant*> &
@@ -1990,8 +1975,15 @@
     llvm::ConstantInt::get(Ty, 0x07C8);
 
   // String pointer.
-  llvm::Constant *C = llvm::ConstantDataArray::getString(VMContext,
-                                                         Entry.getKey());
+  llvm::Constant *C = 0;
+  if (isUTF16) {
+    ArrayRef<uint16_t> Arr =
+      llvm::makeArrayRef<uint16_t>((uint16_t*)Entry.getKey().data(),
+                                   Entry.getKey().size() / 2);
+    C = llvm::ConstantDataArray::get(VMContext, Arr);
+  } else {
+    C = llvm::ConstantDataArray::getString(VMContext, Entry.getKey());
+  }
 
   llvm::GlobalValue::LinkageTypes Linkage;
   if (isUTF16)
@@ -2016,8 +2008,14 @@
     CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
     GV->setAlignment(Align.getQuantity());
   }
+
+  // String.
   Fields[2] = llvm::ConstantExpr::getGetElementPtr(GV, Zeros);
 
+  if (isUTF16)
+    // Cast the UTF16 string to the correct type.
+    Fields[2] = llvm::ConstantExpr::getBitCast(Fields[2], Int8PtrTy);
+
   // String length.
   Ty = getTypes().ConvertType(getContext().LongTy);
   Fields[3] = llvm::ConstantInt::get(Ty, StringLength);
diff --git a/test/CodeGen/darwin-string-literals.c b/test/CodeGen/darwin-string-literals.c
index 6f9e0d2..968386a 100644
--- a/test/CodeGen/darwin-string-literals.c
+++ b/test/CodeGen/darwin-string-literals.c
@@ -2,13 +2,16 @@
 
 // CHECK-LSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00"
 // CHECK-LSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00"
-// CHECK-LSB: @.str2 = internal unnamed_addr constant [36 x i8] c"h\00e\00l\00l\00o\00 \00\92! \00\03& \00\90! \00w\00o\00r\00l\00d\00\00\00", align 2
+// CHECK-LSB: @.str2 = internal unnamed_addr constant [18 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 8594, i16 32, i16 9731, i16 32, i16 8592, i16 32, i16 119, i16 111, i16 114, i16 108, i16 100, i16 0], align 2
+// CHECK-LSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2
+
 
 // RUN: %clang_cc1 -triple powerpc-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix MSB %s
 
 // CHECK-MSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00"
 // CHECK-MSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00"
-// CHECK-MSB: @.str2 = internal unnamed_addr constant [36 x i8] c"\00h\00e\00l\00l\00o\00 !\92\00 &\03\00 !\90\00 \00w\00o\00r\00l\00d\00\00", align 2
+// CHECK-MSB: @.str2 = internal unnamed_addr constant [18 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 8594, i16 32, i16 9731, i16 32, i16 8592, i16 32, i16 119, i16 111, i16 114, i16 108, i16 100, i16 0], align 2
+// CHECK-MSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2
 
 const char *g0 = "string0";
 const void *g1 = __builtin___CFStringMakeConstantString("string1");
diff --git a/test/CodeGen/utf16-cfstrings.c b/test/CodeGen/utf16-cfstrings.c
new file mode 100644
index 0000000..d4f214b
--- /dev/null
+++ b/test/CodeGen/utf16-cfstrings.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
+// <rdar://problem/10655949>
+
+// CHECK: @.str = internal unnamed_addr constant [9 x i16] [i16 252, i16 98, i16 101, i16 114, i16 104, i16 117, i16 110, i16 100, i16 0], align 2
+
+#define CFSTR __builtin___CFStringMakeConstantString
+
+void foo() {
+  CFSTR("überhund");
+}
diff --git a/test/CodeGenObjC/2009-08-05-utf16.m b/test/CodeGenObjC/2009-08-05-utf16.m
index 38c9c82..06458e7 100644
--- a/test/CodeGenObjC/2009-08-05-utf16.m
+++ b/test/CodeGenObjC/2009-08-05-utf16.m
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm -w -x objective-c %s -o - | FileCheck %s
 // rdar://7095855 rdar://7115749
 
-// CHECK: internal unnamed_addr constant [12 x i8]
+// CHECK: internal unnamed_addr constant [6 x i16] [i16 105, i16 80, i16 111, i16 100, i16 8482, i16 0], align 2
 void *P = @"iPod™";