Fixes parsing of hash tokens in the middle of a line. To parse # correctly, we need to know whether it is the first token in a line - we can deduct this either from the whitespace or seeing that the token is the first in the file - we already calculate this information. This patch moves the identification of the first token into the getNextToken method and stores it inside the FormatToken, so the UnwrappedLineParser can stay independent of the SourceManager. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@171640 91177308-0d34-0410-b5e6-96231b3b80d8

commit: f6fd00b12ae7d89436d32851c9bcc8dd3d046ad3 [log] [tgz]
author: Manuel Klimek <klimek@google.com> Sat Jan 05 22:56:06 2013 +0000
committer: Manuel Klimek <klimek@google.com> Sat Jan 05 22:56:06 2013 +0000
tree: c8d17d46d1f754bf515acb33f861ee31fd044b56
parent: c37b4d60f926bf440dfcc312bd6482fed3176e33 [diff]
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index effe0bc..4d401a5 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp

@@ -483,8 +483,7 @@
 
     unsigned Newlines =
         std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
-    unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
-    if (Newlines == 0 && Offset != 0)
+    if (Newlines == 0 && !Token.IsFirst)
       Newlines = 1;
     unsigned Indent = Line.Level * 2;
     if ((Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected) ||
@@ -685,9 +684,10 @@
       next();
       if (Index >= Tokens.size())
         return;
-      // It is the responsibility of the UnwrappedLineParser to make sure
-      // this sequence is not produced inside an unwrapped line.
-      assert(Tokens[Index].Tok.getIdentifierInfo() != NULL);
+      // Hashes in the middle of a line can lead to any strange token
+      // sequence.
+      if (Tokens[Index].Tok.getIdentifierInfo() == NULL)
+        return;
       switch (Tokens[Index].Tok.getIdentifierInfo()->getPPKeywordID()) {
       case tok::pp_include:
       case tok::pp_import:
@@ -1033,6 +1033,8 @@
     Lex.LexFromRawLexer(FormatTok.Tok);
     StringRef Text = tokenText(FormatTok.Tok);
     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
+    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
+      FormatTok.IsFirst = true;
 
     // Consume and record whitespace until we find a significant token.
     while (FormatTok.Tok.is(tok::unknown)) {

diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 2f3a603..72b4750 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp

@@ -470,7 +470,9 @@
 
 void UnwrappedLineParser::readToken() {
   FormatTok = Tokens->getNextToken();
-  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash)) {
+  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
+         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
+          FormatTok.IsFirst)) {
     // FIXME: This is incorrect - the correct way is to create a
     // data structure that will construct the parts around the preprocessor
     // directive as a structured \c UnwrappedLine.

diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index a8e5b73..287143d 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h

@@ -31,7 +31,8 @@
 /// whitespace characters preceeding it.
 struct FormatToken {
   FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0) {
+      : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
+        IsFirst(false) {
   }
 
   /// \brief The \c Token.
@@ -56,6 +57,9 @@
   /// \brief The length in characters of the whitespace immediately preceeding
   /// the \c Token.
   unsigned WhiteSpaceLength;
+
+  /// \brief Indicates that this is the first token.
+  bool IsFirst;
 };
 
 /// \brief An unwrapped line is a sequence of \c Token, that we would like to

diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index 2165056..8d95538 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp

@@ -470,6 +470,10 @@
   EXPECT_EQ("{\n  {\n#define A\n  }\n}", format("{{\n#define A\n}}"));
 }
 
+TEST_F(FormatTest, FormatHashIfNotAtStartOfLine) {
+  verifyFormat("{\n  {\n    a #c;\n  }\n}");
+}
+
 // FIXME: write test for unbalanced braces in macros...
 // FIXME: test # inside a normal statement (like {#define A b})
commit	f6fd00b12ae7d89436d32851c9bcc8dd3d046ad3	[log] [tgz]
author	Manuel Klimek <klimek@google.com>	Sat Jan 05 22:56:06 2013 +0000
committer	Manuel Klimek <klimek@google.com>	Sat Jan 05 22:56:06 2013 +0000
tree	c8d17d46d1f754bf515acb33f861ee31fd044b56
parent	c37b4d60f926bf440dfcc312bd6482fed3176e33 [diff]