Don't map a file:line:col triplet that is inside the preamble range to
a "loaded" location of the precompiled preamble.

Instead, handle specially locations of preprocessed entities:
-When looking up for preprocessed entities, map main file locations inside the
 preamble range to a preamble loaded location.
-When getting the source range of a preprocessing cursor, map preamble loaded
 locations back to main file locations.

Fixes rdar://10175093 & http://llvm.org/PR10999

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@140519 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Frontend/ASTUnit.h b/include/clang/Frontend/ASTUnit.h
index 86f9b06..074b417 100644
--- a/include/clang/Frontend/ASTUnit.h
+++ b/include/clang/Frontend/ASTUnit.h
@@ -175,6 +175,7 @@
   /// \brief The file in which the precompiled preamble is stored.
   std::string PreambleFile;
   
+public:
   class PreambleData {
     const FileEntry *File;
     std::vector<char> Buffer;
@@ -203,10 +204,21 @@
       return NumLines;
     }
 
+    SourceRange getSourceRange(const SourceManager &SM) const {
+      SourceLocation FileLoc = SM.getLocForStartOfFile(SM.getPreambleFileID());
+      return SourceRange(FileLoc, FileLoc.getLocWithOffset(size()-1));
+    }
+
   private:
     void countLines() const;
   };
 
+  const PreambleData &getPreambleData() const {
+    return Preamble;
+  }
+
+private:
+
   /// \brief The contents of the preamble that has been precompiled to
   /// \c PreambleFile.
   PreambleData Preamble;
@@ -514,11 +526,29 @@
                              unsigned Line, unsigned Col) const;
 
   /// \brief Get the source location for the given file:offset pair.
-  ///
-  /// The difference with SourceManager::getLocation is that this method checks
-  /// whether the requested location points inside the precompiled preamble
-  /// in which case the returned source location will be a "loaded" one.
   SourceLocation getLocation(const FileEntry *File, unsigned Offset) const;
+
+  /// \brief If \arg Loc is a loaded location from the preamble, returns
+  /// the corresponding local location of the main file, otherwise it returns
+  /// \arg Loc.
+  SourceLocation mapLocationFromPreamble(SourceLocation Loc);
+
+  /// \brief If \arg Loc is a local location of the main file but inside the
+  /// preamble chunk, returns the corresponding loaded location from the
+  /// preamble, otherwise it returns \arg Loc.
+  SourceLocation mapLocationToPreamble(SourceLocation Loc);
+
+  /// \brief \see mapLocationFromPreamble.
+  SourceRange mapRangeFromPreamble(SourceRange R) {
+    return SourceRange(mapLocationFromPreamble(R.getBegin()),
+                       mapLocationFromPreamble(R.getEnd()));
+  }
+
+  /// \brief \see mapLocationToPreamble.
+  SourceRange mapRangeToPreamble(SourceRange R) {
+    return SourceRange(mapLocationToPreamble(R.getBegin()),
+                       mapLocationToPreamble(R.getEnd()));
+  }
   
   // Retrieve the diagnostics associated with this AST
   typedef const StoredDiagnostic *stored_diag_iterator;
diff --git a/lib/Frontend/ASTUnit.cpp b/lib/Frontend/ASTUnit.cpp
index 4cc936a..4e8226d 100644
--- a/lib/Frontend/ASTUnit.cpp
+++ b/lib/Frontend/ASTUnit.cpp
@@ -2342,27 +2342,59 @@
 SourceLocation ASTUnit::getLocation(const FileEntry *File,
                                     unsigned Line, unsigned Col) const {
   const SourceManager &SM = getSourceManager();
-  SourceLocation Loc;
-  if (!Preamble.empty() && Line <= Preamble.getNumLines())
-    Loc = SM.translateLineCol(SM.getPreambleFileID(), Line, Col);
-  else
-    Loc = SM.translateFileLineCol(File, Line, Col);
-
+  SourceLocation Loc = SM.translateFileLineCol(File, Line, Col);
   return SM.getMacroArgExpandedLocation(Loc);
 }
 
 SourceLocation ASTUnit::getLocation(const FileEntry *File,
                                     unsigned Offset) const {
   const SourceManager &SM = getSourceManager();
-  SourceLocation FileLoc;
-  if (!Preamble.empty() && Offset < Preamble.size())
-    FileLoc = SM.getLocForStartOfFile(SM.getPreambleFileID());
-  else
-    FileLoc = SM.translateFileLineCol(File, 1, 1);
-
+  SourceLocation FileLoc = SM.translateFileLineCol(File, 1, 1);
   return SM.getMacroArgExpandedLocation(FileLoc.getLocWithOffset(Offset));
 }
 
+/// \brief If \arg Loc is a loaded location from the preamble, returns
+/// the corresponding local location of the main file, otherwise it returns
+/// \arg Loc.
+SourceLocation ASTUnit::mapLocationFromPreamble(SourceLocation Loc) {
+  FileID PreambleID;
+  if (SourceMgr)
+    PreambleID = SourceMgr->getPreambleFileID();
+
+  if (Loc.isInvalid() || Preamble.empty() || PreambleID.isInvalid())
+    return Loc;
+
+  unsigned Offs;
+  if (SourceMgr->isInFileID(Loc, PreambleID, &Offs) && Offs < Preamble.size()) {
+    SourceLocation FileLoc
+        = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
+    return FileLoc.getLocWithOffset(Offs);
+  }
+
+  return Loc;
+}
+
+/// \brief If \arg Loc is a local location of the main file but inside the
+/// preamble chunk, returns the corresponding loaded location from the
+/// preamble, otherwise it returns \arg Loc.
+SourceLocation ASTUnit::mapLocationToPreamble(SourceLocation Loc) {
+  FileID PreambleID;
+  if (SourceMgr)
+    PreambleID = SourceMgr->getPreambleFileID();
+
+  if (Loc.isInvalid() || Preamble.empty() || PreambleID.isInvalid())
+    return Loc;
+
+  unsigned Offs;
+  if (SourceMgr->isInFileID(Loc, SourceMgr->getMainFileID(), &Offs) &&
+      Offs < Preamble.size()) {
+    SourceLocation FileLoc = SourceMgr->getLocForStartOfFile(PreambleID);
+    return FileLoc.getLocWithOffset(Offs);
+  }
+
+  return Loc;
+}
+
 void ASTUnit::PreambleData::countLines() const {
   NumLines = 0;
   if (empty())
diff --git a/test/Index/annotate-tokens-pp.c b/test/Index/annotate-tokens-pp.c
index ecdabb6..a814ac5 100644
--- a/test/Index/annotate-tokens-pp.c
+++ b/test/Index/annotate-tokens-pp.c
@@ -31,6 +31,7 @@
 const char *fname = __FILE__;
 
 // RUN: c-index-test -test-annotate-tokens=%s:2:1:32:1 -I%S/Inputs %s | FileCheck %s
+// RUN: env CINDEXTEST_EDITING=1 c-index-test -test-annotate-tokens=%s:2:1:32:1 -I%S/Inputs %s | FileCheck %s
 // CHECK: Punctuation: "#" [2:1 - 2:2] preprocessing directive=
 // CHECK: Identifier: "define" [2:2 - 2:8] preprocessing directive=
 // CHECK: Identifier: "STILL_NOTHING" [2:9 - 2:22] macro definition=STILL_NOTHING
diff --git a/test/Index/annotate-tokens-preamble.c b/test/Index/annotate-tokens-preamble.c
new file mode 100644
index 0000000..afac87a
--- /dev/null
+++ b/test/Index/annotate-tokens-preamble.c
@@ -0,0 +1,20 @@
+// A comment line.
+
+void f(void *ptr) {
+}
+
+
+// RUN: c-index-test -test-annotate-tokens=%s:1:1:5:1 %s | FileCheck %s
+// RUN: env CINDEXTEST_EDITING=1 c-index-test -test-annotate-tokens=%s:1:1:5:1 %s | FileCheck %s
+// CHECK: Comment: "// A comment line." [1:1 - 1:19]
+// CHECK: Keyword: "void" [3:1 - 3:5] FunctionDecl=f:3:6 (Definition)
+// CHECK: Identifier: "f" [3:6 - 3:7] FunctionDecl=f:3:6 (Definition)
+// CHECK: Punctuation: "(" [3:7 - 3:8] FunctionDecl=f:3:6 (Definition)
+// CHECK: Keyword: "void" [3:8 - 3:12] ParmDecl=ptr:3:14 (Definition)
+// CHECK: Punctuation: "*" [3:13 - 3:14] ParmDecl=ptr:3:14 (Definition)
+// CHECK: Identifier: "ptr" [3:14 - 3:17] ParmDecl=ptr:3:14 (Definition)
+// CHECK: Punctuation: ")" [3:17 - 3:18] FunctionDecl=f:3:6 (Definition)
+// CHECK: Punctuation: "{" [3:19 - 3:20] UnexposedStmt=
+// CHECK: Punctuation: "}" [4:1 - 4:2] UnexposedStmt=
+
+
diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c
index 6b72413..33044e3 100644
--- a/tools/c-index-test/c-index-test.c
+++ b/tools/c-index-test/c-index-test.c
@@ -1405,6 +1405,17 @@
   }
   errorCode = 0;
 
+  if (getenv("CINDEXTEST_EDITING")) {
+    for (i = 0; i < 5; ++i) {
+      if (clang_reparseTranslationUnit(TU, num_unsaved_files, unsaved_files,
+                                       clang_defaultReparseOptions(TU))) {
+        fprintf(stderr, "Unable to reparse translation unit!\n");
+        errorCode = -1;
+        goto teardown;
+      }
+    }
+  }
+
   file = clang_getFile(TU, filename);
   if (!file) {
     fprintf(stderr, "file %s is not in this translation unit\n", filename);
diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp
index d40b5b6..30db131 100644
--- a/tools/libclang/CIndex.cpp
+++ b/tools/libclang/CIndex.cpp
@@ -396,8 +396,9 @@
     = *AU->getPreprocessor().getPreprocessingRecord();
   
   if (RegionOfInterest.isValid()) {
+    SourceRange MappedRange = AU->mapRangeToPreamble(RegionOfInterest);
     std::pair<PreprocessingRecord::iterator, PreprocessingRecord::iterator>
-      Entities = PPRec.getPreprocessedEntitiesInRange(RegionOfInterest);
+      Entities = PPRec.getPreprocessedEntitiesInRange(MappedRange);
     return visitPreprocessedEntities(Entities.first, Entities.second);
   }
 
@@ -3790,14 +3791,23 @@
   if (C.kind == CXCursor_PreprocessingDirective)
     return cxcursor::getCursorPreprocessingDirective(C);
 
-  if (C.kind == CXCursor_MacroExpansion)
-    return cxcursor::getCursorMacroExpansion(C)->getSourceRange();
+  if (C.kind == CXCursor_MacroExpansion) {
+    ASTUnit *TU = getCursorASTUnit(C);
+    SourceRange Range = cxcursor::getCursorMacroExpansion(C)->getSourceRange();
+    return TU->mapRangeFromPreamble(Range);
+  }
 
-  if (C.kind == CXCursor_MacroDefinition)
-    return cxcursor::getCursorMacroDefinition(C)->getSourceRange();
+  if (C.kind == CXCursor_MacroDefinition) {
+    ASTUnit *TU = getCursorASTUnit(C);
+    SourceRange Range = cxcursor::getCursorMacroDefinition(C)->getSourceRange();
+    return TU->mapRangeFromPreamble(Range);
+  }
 
-  if (C.kind == CXCursor_InclusionDirective)
-    return cxcursor::getCursorInclusionDirective(C)->getSourceRange();
+  if (C.kind == CXCursor_InclusionDirective) {
+    ASTUnit *TU = getCursorASTUnit(C);
+    SourceRange Range = cxcursor::getCursorInclusionDirective(C)->getSourceRange();
+    return TU->mapRangeFromPreamble(Range);
+  }
 
   if (C.kind >= CXCursor_FirstDecl && C.kind <= CXCursor_LastDecl) {
     Decl *D = cxcursor::getCursorDecl(C);
@@ -4395,28 +4405,13 @@
                         SourceLocation::getFromRawEncoding(CXTok.int_data[1]));
 }
 
-void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range,
-                    CXToken **Tokens, unsigned *NumTokens) {
-  if (Tokens)
-    *Tokens = 0;
-  if (NumTokens)
-    *NumTokens = 0;
-
-  ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU->TUData);
-  if (!CXXUnit || !Tokens || !NumTokens)
-    return;
-
-  ASTUnit::ConcurrencyCheck Check(*CXXUnit);
-  
-  SourceRange R = cxloc::translateCXSourceRange(Range);
-  if (R.isInvalid())
-    return;
-
+static void getTokens(ASTUnit *CXXUnit, SourceRange Range,
+                      SmallVectorImpl<CXToken> &CXTokens) {
   SourceManager &SourceMgr = CXXUnit->getSourceManager();
   std::pair<FileID, unsigned> BeginLocInfo
-    = SourceMgr.getDecomposedLoc(R.getBegin());
+    = SourceMgr.getDecomposedLoc(Range.getBegin());
   std::pair<FileID, unsigned> EndLocInfo
-    = SourceMgr.getDecomposedLoc(R.getEnd());
+    = SourceMgr.getDecomposedLoc(Range.getEnd());
 
   // Cannot tokenize across files.
   if (BeginLocInfo.first != EndLocInfo.first)
@@ -4436,7 +4431,6 @@
 
   // Lex tokens until we hit the end of the range.
   const char *EffectiveBufferEnd = Buffer.data() + EndLocInfo.second;
-  SmallVector<CXToken, 32> CXTokens;
   Token Tok;
   bool previousWasAt = false;
   do {
@@ -4481,6 +4475,27 @@
     CXTokens.push_back(CXTok);
     previousWasAt = Tok.is(tok::at);
   } while (Lex.getBufferLocation() <= EffectiveBufferEnd);
+}
+
+void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range,
+                    CXToken **Tokens, unsigned *NumTokens) {
+  if (Tokens)
+    *Tokens = 0;
+  if (NumTokens)
+    *NumTokens = 0;
+
+  ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU->TUData);
+  if (!CXXUnit || !Tokens || !NumTokens)
+    return;
+
+  ASTUnit::ConcurrencyCheck Check(*CXXUnit);
+  
+  SourceRange R = cxloc::translateCXSourceRange(Range);
+  if (R.isInvalid())
+    return;
+
+  SmallVector<CXToken, 32> CXTokens;
+  getTokens(CXXUnit, R, CXTokens);
 
   if (CXTokens.empty())
     return;
@@ -4919,6 +4934,73 @@
   };
 }
 
+static void annotatePreprocessorTokens(CXTranslationUnit TU,
+                                       SourceRange RegionOfInterest,
+                                       AnnotateTokensData &Annotated) {
+  ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU->TUData);
+
+  SourceManager &SourceMgr = CXXUnit->getSourceManager();
+  std::pair<FileID, unsigned> BeginLocInfo
+    = SourceMgr.getDecomposedLoc(RegionOfInterest.getBegin());
+  std::pair<FileID, unsigned> EndLocInfo
+    = SourceMgr.getDecomposedLoc(RegionOfInterest.getEnd());
+
+  if (BeginLocInfo.first != EndLocInfo.first)
+    return;
+
+  StringRef Buffer;
+  bool Invalid = false;
+  Buffer = SourceMgr.getBufferData(BeginLocInfo.first, &Invalid);
+  if (Buffer.empty() || Invalid)
+    return;
+
+  Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first),
+            CXXUnit->getASTContext().getLangOptions(),
+            Buffer.begin(), Buffer.data() + BeginLocInfo.second,
+            Buffer.end());
+  Lex.SetCommentRetentionState(true);
+  
+  // Lex tokens in raw mode until we hit the end of the range, to avoid
+  // entering #includes or expanding macros.
+  while (true) {
+    Token Tok;
+    Lex.LexFromRawLexer(Tok);
+    
+  reprocess:
+    if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
+      // We have found a preprocessing directive. Gobble it up so that we
+      // don't see it while preprocessing these tokens later, but keep track
+      // of all of the token locations inside this preprocessing directive so
+      // that we can annotate them appropriately.
+      //
+      // FIXME: Some simple tests here could identify macro definitions and
+      // #undefs, to provide specific cursor kinds for those.
+      SmallVector<SourceLocation, 32> Locations;
+      do {
+        Locations.push_back(Tok.getLocation());
+        Lex.LexFromRawLexer(Tok);
+      } while (!Tok.isAtStartOfLine() && !Tok.is(tok::eof));
+      
+      using namespace cxcursor;
+      CXCursor Cursor
+      = MakePreprocessingDirectiveCursor(SourceRange(Locations.front(),
+                                                     Locations.back()),
+                                         TU);
+      for (unsigned I = 0, N = Locations.size(); I != N; ++I) {
+        Annotated[Locations[I].getRawEncoding()] = Cursor;
+      }
+      
+      if (Tok.isAtStartOfLine())
+        goto reprocess;
+      
+      continue;
+    }
+    
+    if (Tok.is(tok::eof))
+      break;
+  }
+}
+
 // This gets run a separate thread to avoid stack blowout.
 static void clang_annotateTokensImpl(void *UserData) {
   CXTranslationUnit TU = ((clang_annotateTokens_Data*)UserData)->TU;
@@ -4938,66 +5020,10 @@
   // A mapping from the source locations found when re-lexing or traversing the
   // region of interest to the corresponding cursors.
   AnnotateTokensData Annotated;
-  
+
   // Relex the tokens within the source range to look for preprocessing
   // directives.
-  SourceManager &SourceMgr = CXXUnit->getSourceManager();
-  std::pair<FileID, unsigned> BeginLocInfo
-    = SourceMgr.getDecomposedLoc(RegionOfInterest.getBegin());
-  std::pair<FileID, unsigned> EndLocInfo
-    = SourceMgr.getDecomposedLoc(RegionOfInterest.getEnd());
-  
-  StringRef Buffer;
-  bool Invalid = false;
-  if (BeginLocInfo.first == EndLocInfo.first &&
-      ((Buffer = SourceMgr.getBufferData(BeginLocInfo.first, &Invalid)),true) &&
-      !Invalid) {
-    Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first),
-              CXXUnit->getASTContext().getLangOptions(),
-              Buffer.begin(), Buffer.data() + BeginLocInfo.second,
-              Buffer.end());
-    Lex.SetCommentRetentionState(true);
-    
-    // Lex tokens in raw mode until we hit the end of the range, to avoid
-    // entering #includes or expanding macros.
-    while (true) {
-      Token Tok;
-      Lex.LexFromRawLexer(Tok);
-      
-    reprocess:
-      if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
-        // We have found a preprocessing directive. Gobble it up so that we
-        // don't see it while preprocessing these tokens later, but keep track
-        // of all of the token locations inside this preprocessing directive so
-        // that we can annotate them appropriately.
-        //
-        // FIXME: Some simple tests here could identify macro definitions and
-        // #undefs, to provide specific cursor kinds for those.
-        SmallVector<SourceLocation, 32> Locations;
-        do {
-          Locations.push_back(Tok.getLocation());
-          Lex.LexFromRawLexer(Tok);
-        } while (!Tok.isAtStartOfLine() && !Tok.is(tok::eof));
-        
-        using namespace cxcursor;
-        CXCursor Cursor
-        = MakePreprocessingDirectiveCursor(SourceRange(Locations.front(),
-                                                       Locations.back()),
-                                           TU);
-        for (unsigned I = 0, N = Locations.size(); I != N; ++I) {
-          Annotated[Locations[I].getRawEncoding()] = Cursor;
-        }
-        
-        if (Tok.isAtStartOfLine())
-          goto reprocess;
-        
-        continue;
-      }
-      
-      if (Tok.is(tok::eof))
-        break;
-    }
-  }
+  annotatePreprocessorTokens(TU, RegionOfInterest, Annotated);
   
   if (CXXUnit->getPreprocessor().getPreprocessingRecord()) {
     // Search and mark tokens that are macro argument expansions.
diff --git a/tools/libclang/CXCursor.cpp b/tools/libclang/CXCursor.cpp
index d9e8f4a..eb54ed2 100644
--- a/tools/libclang/CXCursor.cpp
+++ b/tools/libclang/CXCursor.cpp
@@ -364,10 +364,12 @@
 
 SourceRange cxcursor::getCursorPreprocessingDirective(CXCursor C) {
   assert(C.kind == CXCursor_PreprocessingDirective);
-  return SourceRange(SourceLocation::getFromRawEncoding(
+  SourceRange Range = SourceRange(SourceLocation::getFromRawEncoding(
                                       reinterpret_cast<uintptr_t> (C.data[0])),
                      SourceLocation::getFromRawEncoding(
                                       reinterpret_cast<uintptr_t> (C.data[1])));
+  ASTUnit *TU = getCursorASTUnit(C);
+  return TU->mapRangeFromPreamble(Range);
 }
 
 CXCursor cxcursor::MakeMacroDefinitionCursor(MacroDefinition *MI,