Lexer: Don't warn about Unicode in preprocessor directives.
This allows people to use Unicode in their #pragma mark and in macros
that exist only to be string-ized.
<rdar://problem/13107323&13121362>
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@174081 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 4b5a313..1b064c8 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -2832,7 +2832,8 @@
return LexIdentifier(Result, CurPtr);
}
- if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput() &&
!isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
// Non-ASCII characters tend to creep into source code unintentionally.
// Instead of letting the parser complain about the unknown token,
@@ -3537,7 +3538,8 @@
if (Status == conversionOK)
return LexUnicode(Result, CodePoint, CurPtr);
- if (isLexingRawMode() || PP->isPreprocessedOutput()) {
+ if (isLexingRawMode() || ParsingPreprocessorDirective ||
+ PP->isPreprocessedOutput()) {
++CurPtr;
Kind = tok::unknown;
break;
diff --git a/test/Lexer/unicode.c b/test/Lexer/unicode.c
index 26e77f6..de758f1 100644
--- a/test/Lexer/unicode.c
+++ b/test/Lexer/unicode.c
@@ -10,6 +10,17 @@
// CHECK: extern int {{x}}
// CHECK: extern int {{x}}
+#pragma mark ¡Unicode!
+
+#define COPYRIGHT Copyright © 2012
+#define XSTR(X) #X
+#define STR(X) XSTR(X)
+
+static const char *copyright = STR(COPYRIGHT); // no-warning
+// CHECK: static const char *copyright = "Copyright © {{2012}}";
+
#if PP_ONLY
+COPYRIGHT
+// CHECK: Copyright © {{2012}}
CHECK: The preprocessor should not complain about Unicode characters like ©.
#endif
diff --git a/test/Lexer/utf8-invalid.c b/test/Lexer/utf8-invalid.c
index a387ff7..2657b54 100644
--- a/test/Lexer/utf8-invalid.c
+++ b/test/Lexer/utf8-invalid.c
@@ -9,3 +9,7 @@
// Don't warn about bad UTF-8 in raw lexing mode.
extern int x;
#endif
+
+// Don't warn about bad UTF-8 in preprocessor directives.
+#define x82
+#pragma mark