Lexer: Don't warn about Unicode in preprocessor directives. This allows people to use Unicode in their #pragma mark and in macros that exist only to be string-ized. <rdar://problem/13107323&13121362> git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@174081 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 0ed439487491e09faffdbabfacb1d050292c7723 [log] [tgz]
author: Jordan Rose <jordan_rose@apple.com> Thu Jan 31 19:48:48 2013 +0000
committer: Jordan Rose <jordan_rose@apple.com> Thu Jan 31 19:48:48 2013 +0000
tree: 22ef0ede82481c9a777dbd563adfd2869f0d3750
parent: 98b21b9fcef25fc6953ce36b4c71ca2c02999f1d [diff]
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 4b5a313..1b064c8 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp

@@ -2832,7 +2832,8 @@
     return LexIdentifier(Result, CurPtr);
   }
 
-  if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+  if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+      !PP->isPreprocessedOutput() &&
       !isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
     // Non-ASCII characters tend to creep into source code unintentionally.
     // Instead of letting the parser complain about the unknown token,
@@ -3537,7 +3538,8 @@
     if (Status == conversionOK)
       return LexUnicode(Result, CodePoint, CurPtr);
     
-    if (isLexingRawMode() || PP->isPreprocessedOutput()) {
+    if (isLexingRawMode() || ParsingPreprocessorDirective ||
+        PP->isPreprocessedOutput()) {
       ++CurPtr;
       Kind = tok::unknown;
       break;

diff --git a/test/Lexer/unicode.c b/test/Lexer/unicode.c
index 26e77f6..de758f1 100644
--- a/test/Lexer/unicode.c
+++ b/test/Lexer/unicode.c

@@ -10,6 +10,17 @@
 // CHECK: extern int {{x}}
 // CHECK: extern int　{{x}}
 
+#pragma mark ¡Unicode!
+
+#define COPYRIGHT Copyright © 2012
+#define XSTR(X) #X
+#define STR(X) XSTR(X)
+
+static const char *copyright = STR(COPYRIGHT); // no-warning
+// CHECK: static const char *copyright = "Copyright © {{2012}}";
+
 #if PP_ONLY
+COPYRIGHT
+// CHECK: Copyright © {{2012}}
 CHECK: The preprocessor should not complain about Unicode characters like ©.
 #endif

diff --git a/test/Lexer/utf8-invalid.c b/test/Lexer/utf8-invalid.c
index a387ff7..2657b54 100644
--- a/test/Lexer/utf8-invalid.c
+++ b/test/Lexer/utf8-invalid.c

@@ -9,3 +9,7 @@
 // Don't warn about bad UTF-8 in raw lexing mode.
 extern int x;
 #endif
+
+// Don't warn about bad UTF-8 in preprocessor directives.
+#define x82 
+#pragma mark
commit	0ed439487491e09faffdbabfacb1d050292c7723	[log] [tgz]
author	Jordan Rose <jordan_rose@apple.com>	Thu Jan 31 19:48:48 2013 +0000
committer	Jordan Rose <jordan_rose@apple.com>	Thu Jan 31 19:48:48 2013 +0000
tree	22ef0ede82481c9a777dbd563adfd2869f0d3750
parent	98b21b9fcef25fc6953ce36b4c71ca2c02999f1d [diff]